From b49ebf829fbabfc8e2d634db60c9f75a35e27e97 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Thu, 2 Mar 2023 13:12:12 -0500 Subject: [PATCH 01/90] cluster-manager: Generate a unique ID (int) for each node. This commit adds a controller - zone cluster controller for managing the zone related node annotations. Upcoming commit will add support for zones. Right now zone cluster controller will allocate a unique id for each node and stores it in the node annotation. Future commits will make use of the node id to - allocate node gateway router port ips - to support interconnect feature Co-authored-by: Numan Siddique Signed-off-by: Dumitru Ceara Signed-off-by: Numan Siddique --- .../pkg/clustermanager/clustermanager.go | 16 +- .../pkg/clustermanager/clustermanager_test.go | 445 ++++++++++++++++++ .../pkg/clustermanager/id_allocator.go | 79 ++++ .../clustermanager/zone_cluster_controller.go | 324 +++++++++++++ go-controller/pkg/util/node_annotations.go | 39 ++ 5 files changed, 902 insertions(+), 1 deletion(-) create mode 100644 go-controller/pkg/clustermanager/id_allocator.go create mode 100644 go-controller/pkg/clustermanager/zone_cluster_controller.go diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 8fec4df666..3e27b71485 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -2,6 +2,7 @@ package clustermanager import ( "context" + "fmt" "sync" clientset "k8s.io/client-go/kubernetes" @@ -21,6 +22,7 @@ import ( type ClusterManager struct { client clientset.Interface defaultNetClusterController *networkClusterController + zoneClusterController *zoneClusterController wf *factory.WatchFactory wg *sync.WaitGroup secondaryNetClusterManager *secondaryNetworkClusterManager @@ -37,15 +39,22 @@ func NewClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.W identity string, wg *sync.WaitGroup, recorder record.EventRecorder) (*ClusterManager, error) { defaultNetClusterController := newNetworkClusterController(ovntypes.DefaultNetworkName, config.Default.ClusterSubnets, ovnClient, wf, config.HybridOverlay.Enabled, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) + + zoneClusterController, err := newZoneClusterController(ovnClient, wf) + if err != nil { + return nil, fmt.Errorf("failed to create zone cluster controller, err : %w", err) + } + cm := &ClusterManager{ client: ovnClient.KubeClient, defaultNetClusterController: defaultNetClusterController, + zoneClusterController: zoneClusterController, wg: wg, wf: wf, recorder: recorder, identity: identity, } - var err error + if config.OVNKubernetesFeature.EnableMultiNetwork { cm.secondaryNetClusterManager, err = newSecondaryNetworkClusterManager(ovnClient, wf, recorder) if err != nil { @@ -69,6 +78,10 @@ func (cm *ClusterManager) Start(ctx context.Context) error { return err } + if err := cm.zoneClusterController.Start(ctx); err != nil { + return fmt.Errorf("could not start zone controller, err: %w", err) + } + if config.OVNKubernetesFeature.EnableMultiNetwork { if err := cm.secondaryNetClusterManager.Start(); err != nil { return err @@ -82,6 +95,7 @@ func (cm *ClusterManager) Start(ctx context.Context) error { func (cm *ClusterManager) Stop() { klog.Info("Stopping the cluster manager") cm.defaultNetClusterController.Stop() + cm.zoneClusterController.Stop() if config.OVNKubernetesFeature.EnableMultiNetwork { cm.secondaryNetClusterManager.Stop() } diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index ef2195086d..adb024d2fb 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -2,7 +2,9 @@ package clustermanager import ( "context" + "fmt" "net" + "strconv" "sync" v1 "k8s.io/api/core/v1" @@ -21,6 +23,11 @@ import ( "github.com/urfave/cli/v2" ) +const ( + // ovnNodeIDAnnotaton is the node annotation name used to store the node id. + ovnNodeIDAnnotaton = "k8s.ovn.org/node-id" +) + var _ = ginkgo.Describe("Cluster Manager", func() { var ( app *cli.App @@ -394,4 +401,442 @@ var _ = ginkgo.Describe("Cluster Manager", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) }) + + ginkgo.Context("Node Id allocations", func() { + ginkgo.It("check for node id allocations", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterManager, err := NewClusterManager(fakeClient, f, "identity", wg, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer clusterManager.Stop() + + // Check that cluster manager has allocated id for each node + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeId, ok := updatedNode.Annotations[ovnNodeIDAnnotaton] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have node id allocated", n.Name) + } + + _, err = strconv.Atoi(nodeId) + if err != nil { + return fmt.Errorf("expected node annotation for node %s to be an integer value, got %s", n.Name, nodeId) + } + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("clear the node ids and check", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterManager, err := NewClusterManager(fakeClient, f, "identity", wg, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer clusterManager.Stop() + + nodeIds := make(map[string]string) + // Check that cluster manager has allocated id for each node before clearing + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeId, ok := updatedNode.Annotations[ovnNodeIDAnnotaton] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have node id allocated", n.Name) + } + + _, err = strconv.Atoi(nodeId) + if err != nil { + return fmt.Errorf("expected node annotation for node %s to be an integer value, got %s", n.Name, nodeId) + } + + nodeIds[n.Name] = nodeId + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + // Clear the node id annotation of nodes and make sure it is reset by cluster manager + // with the same ids. + for _, n := range nodes { + nodeAnnotator := kube.NewNodeAnnotator(&kube.Kube{kubeFakeClient}, n.Name) + + nodeAnnotations := n.Annotations + for k, v := range nodeAnnotations { + nodeAnnotator.Set(k, v) + } + nodeAnnotator.Delete(ovnNodeIDAnnotaton) + err = nodeAnnotator.Run() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeId, ok := updatedNode.Annotations[ovnNodeIDAnnotaton] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have node id allocated", n.Name) + } + + _, err = strconv.Atoi(nodeId) + if err != nil { + return fmt.Errorf("expected node annotation for node %s to be an integer value, got %s", n.Name, nodeId) + } + + gomega.Expect(nodeId).To(gomega.Equal(nodeIds[n.Name])) + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Stop and start a new cluster manager and verify the node ids", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + wg1 := &sync.WaitGroup{} + clusterManager, err := NewClusterManager(fakeClient, f, "cm1", wg1, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check that cluster manager has allocated id for each node before clearing + nodeIds := make(map[string]string) + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeId, ok := updatedNode.Annotations[ovnNodeIDAnnotaton] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have node id allocated", n.Name) + } + + _, err = strconv.Atoi(nodeId) + if err != nil { + return fmt.Errorf("expected node annotation for node %s to be an integer value, got %s", n.Name, nodeId) + } + + nodeIds[n.Name] = nodeId + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + updatedNodes := []v1.Node{} + for _, n := range nodes { + updatedNode, _ := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + updatedNodes = append(updatedNodes, *updatedNode) + } + // stop the cluster manager and start a new instance and make sure the node ids are same. + clusterManager.Stop() + wg1.Wait() + + // Close the watch factory and create a new one + f.Shutdown() + kubeFakeClient = fake.NewSimpleClientset(&v1.NodeList{ + Items: updatedNodes, + }) + fakeClient = &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + cm2, err := NewClusterManager(fakeClient, f, "cm2", wg, nil) + gomega.Expect(cm2).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = cm2.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer cm2.Stop() + + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeId, ok := updatedNode.Annotations[ovnNodeIDAnnotaton] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have node id allocated", n.Name) + } + + _, err = strconv.Atoi(nodeId) + if err != nil { + return fmt.Errorf("expected node annotation for node %s to be an integer value, got %s", n.Name, nodeId) + } + + gomega.Expect(nodeId).To(gomega.Equal(nodeIds[n.Name])) + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Stop cluster manager, set duplicate id, restart and verify the node ids", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + wg1 := &sync.WaitGroup{} + clusterManager, err := NewClusterManager(fakeClient, f, "cm1", wg1, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + nodeIds := make(map[string]string) + // Check that cluster manager has allocated id for each node before clearing + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeId, ok := updatedNode.Annotations[ovnNodeIDAnnotaton] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have node id allocated", n.Name) + } + + _, err = strconv.Atoi(nodeId) + if err != nil { + return fmt.Errorf("expected node annotation for node %s to be an integer value, got %s", n.Name, nodeId) + } + + nodeIds[n.Name] = nodeId + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + // stop the cluster manager. + clusterManager.Stop() + wg1.Wait() + + updatedNodes := []v1.Node{} + node2, _ := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), "node2", metav1.GetOptions{}) + for _, n := range nodes { + updatedNode, _ := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if updatedNode.Name == "node3" { + // Make the id of node3 duplicate. + updatedNode.Annotations[ovnNodeIDAnnotaton] = node2.Annotations[ovnNodeIDAnnotaton] + } + updatedNodes = append(updatedNodes, *updatedNode) + } + + // Close the watch factory and create a new one + f.Shutdown() + kubeFakeClient = fake.NewSimpleClientset(&v1.NodeList{ + Items: updatedNodes, + }) + fakeClient = &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Start a new cluster manager + cm2, err := NewClusterManager(fakeClient, f, "cm2", wg, nil) + gomega.Expect(cm2).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = cm2.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer cm2.Stop() + + // Get the node ids of node2 and node3 and make sure that they are not equal + gomega.Eventually(func() error { + n2, _ := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), "node2", metav1.GetOptions{}) + n3, _ := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), "node3", metav1.GetOptions{}) + n2Id := n2.Annotations[ovnNodeIDAnnotaton] + n3Id := n3.Annotations[ovnNodeIDAnnotaton] + if n2Id == n3Id { + return fmt.Errorf("expected node annotation for node2 and node3 to be not equal, but they are : node id %s", n2Id) + } + return nil + }).ShouldNot(gomega.HaveOccurred()) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) }) diff --git a/go-controller/pkg/clustermanager/id_allocator.go b/go-controller/pkg/clustermanager/id_allocator.go new file mode 100644 index 0000000000..4de49baabb --- /dev/null +++ b/go-controller/pkg/clustermanager/id_allocator.go @@ -0,0 +1,79 @@ +package clustermanager + +import ( + "fmt" + "sync" + + bitmapallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator/allocator" +) + +const ( + invalidID = -1 +) + +// idAllocator is used to allocate id for a resource and store the resource - id in a map +type idAllocator struct { + nameIdMap sync.Map + idBitmap *bitmapallocator.AllocationBitmap +} + +// NewIDAllocator returns an IDAllocator +func NewIDAllocator(name string, maxIds int) (*idAllocator, error) { + idBitmap := bitmapallocator.NewContiguousAllocationMap(maxIds, name) + + return &idAllocator{ + nameIdMap: sync.Map{}, + idBitmap: idBitmap, + }, nil +} + +// allocateID allocates an id for the resource 'name' and returns the id. +// If the id for the resource is already allocated, it returns the cached id. +func (idAllocator *idAllocator) allocateID(name string) (int, error) { + // Check the idMap and return the id if its already allocated + v, ok := idAllocator.nameIdMap.Load(name) + if ok { + return v.(int), nil + } + + id, allocated, _ := idAllocator.idBitmap.AllocateNext() + + if !allocated { + return invalidID, fmt.Errorf("failed to allocate the id for the resource %s", name) + } + + idAllocator.nameIdMap.Store(name, id) + return id, nil +} + +// reserveID reserves the id 'id' for the resource 'name'. It returns an +// error if the 'id' is already reserved by a resource other than 'name'. +// It also returns an error if the resource 'name' has a different 'id' +// already reserved. +func (idAllocator *idAllocator) reserveID(name string, id int) error { + v, ok := idAllocator.nameIdMap.Load(name) + if ok { + if v.(int) == id { + // All good. The id is already reserved by the same resource name. + return nil + } + return fmt.Errorf("can't reserve id %d for the resource %s. It is already allocated with a different id %d", id, name, v.(int)) + } + + reserved, _ := idAllocator.idBitmap.Allocate(id) + if !reserved { + return fmt.Errorf("id %d is already reserved by another resource", id) + } + + idAllocator.nameIdMap.Store(name, id) + return nil +} + +// releaseID releases the id allocated for the resource 'name' +func (idAllocator *idAllocator) releaseID(name string) { + v, ok := idAllocator.nameIdMap.Load(name) + if ok { + idAllocator.idBitmap.Release(v.(int)) + idAllocator.nameIdMap.Delete(name) + } +} diff --git a/go-controller/pkg/clustermanager/zone_cluster_controller.go b/go-controller/pkg/clustermanager/zone_cluster_controller.go new file mode 100644 index 0000000000..69b4d7a56a --- /dev/null +++ b/go-controller/pkg/clustermanager/zone_cluster_controller.go @@ -0,0 +1,324 @@ +package clustermanager + +import ( + "context" + "fmt" + "reflect" + "sync" + + corev1 "k8s.io/api/core/v1" + cache "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + objretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +const ( + // Maximum node IDs that can be generated. Limited to maximum nodes supported by k8s. + maxNodeIDs = 5000 +) + +// zoneClusterController is the cluster controller for managing all the zone(s) in the cluster. +type zoneClusterController struct { + kube kube.Interface + watchFactory *factory.WatchFactory + stopChan chan struct{} + wg *sync.WaitGroup + + // node events factory handler + nodeHandler *factory.Handler + + // retry framework for nodes + retryNodes *objretry.RetryFramework + + // ID allocator for the nodes + nodeIDAllocator *idAllocator +} + +func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory) (*zoneClusterController, error) { + // Since we don't assign 0 to any node, create IDAllocator with one extra element in maxIds. + nodeIDAllocator, err := NewIDAllocator("NodeIDs", maxNodeIDs+1) + if err != nil { + return nil, fmt.Errorf("failed to create an IdAllocator for the nodes, err: %w", err) + } + + // Reserve the id 0. We don't want to assign this id to any of the nodes. + if err := nodeIDAllocator.reserveID("zero", 0); err != nil { + return nil, fmt.Errorf("idAllocator failed to reserve id 0") + } + + kube := &kube.Kube{ + KClient: ovnClient.KubeClient, + } + wg := &sync.WaitGroup{} + zcc := &zoneClusterController{ + kube: kube, + watchFactory: wf, + stopChan: make(chan struct{}), + wg: wg, + nodeIDAllocator: nodeIDAllocator, + } + + zcc.initRetryFramework() + return zcc, nil +} + +func (zcc *zoneClusterController) initRetryFramework() { + // We are interested in only nodes + resourceHandler := &objretry.ResourceHandler{ + HasUpdateFunc: true, + NeedsUpdateDuringRetry: false, + ObjType: factory.NodeType, + EventHandler: &zoneClusterControllerEventHandler{ + objType: factory.NodeType, + zcc: zcc, + syncFunc: nil, + }, + } + + zcc.retryNodes = objretry.NewRetryFramework(zcc.stopChan, zcc.wg, zcc.watchFactory, resourceHandler) +} + +// Start starts the zone cluster controller to watch the kubernetes nodes +func (zcc *zoneClusterController) Start(ctx context.Context) error { + nodeHandler, err := zcc.retryNodes.WatchResource() + + if err != nil { + return fmt.Errorf("unable to watch nodes: %w", err) + } + + zcc.nodeHandler = nodeHandler + return nil +} + +func (zcc *zoneClusterController) Stop() { + close(zcc.stopChan) + zcc.wg.Wait() + + if zcc.nodeHandler != nil { + zcc.watchFactory.RemoveNodeHandler(zcc.nodeHandler) + } +} + +// handleAddUpdateNodeEvent handles the add or update node event +func (zcc *zoneClusterController) handleAddUpdateNodeEvent(node *corev1.Node) error { + allocatedNodeID, err := zcc.nodeIDAllocator.allocateID(node.Name) + if err != nil { + return fmt.Errorf("failed to allocate an id to the node %s : err - %w", node.Name, err) + } + klog.V(5).Infof("Allocated id %d to the node %s", allocatedNodeID, node.Name) + nodeAnnotations := util.UpdateNodeIDAnnotation(nil, allocatedNodeID) + + return zcc.kube.SetAnnotationsOnNode(node.Name, nodeAnnotations) +} + +// handleAddUpdateNodeEvent handles the delete node event +func (zcc *zoneClusterController) handleDeleteNode(node *corev1.Node) error { + zcc.nodeIDAllocator.releaseID(node.Name) + return nil +} + +func (zcc *zoneClusterController) syncNodes(nodes []interface{}) error { + return zcc.syncNodeIDs(nodes) +} + +func (zcc *zoneClusterController) syncNodeIDs(nodes []interface{}) error { + duplicateIdNodes := []string{} + + for _, nodeObj := range nodes { + node, ok := nodeObj.(*corev1.Node) + if !ok { + return fmt.Errorf("spurious object in syncNodes: %v", nodeObj) + } + + nodeID := util.GetNodeID(node) + if nodeID != util.InvalidNodeID { + klog.Infof("Node %s has the id %d set", node.Name, nodeID) + if err := zcc.nodeIDAllocator.reserveID(node.Name, nodeID); err != nil { + // The id set on this node is duplicate. + klog.Infof("Node %s has a duplicate id %d set", node.Name, nodeID) + duplicateIdNodes = append(duplicateIdNodes, node.Name) + } + } + } + + for i := range duplicateIdNodes { + newNodeID, err := zcc.nodeIDAllocator.allocateID(duplicateIdNodes[i]) + if err != nil { + return fmt.Errorf("failed to allocate id for node %s : err - %w", duplicateIdNodes[i], err) + } else { + klog.Infof("Allocated new id %d for node %q", newNodeID, duplicateIdNodes[i]) + } + } + + return nil +} + +// zoneClusterControllerEventHandler object handles the events +// from retry framework. +type zoneClusterControllerEventHandler struct { + objretry.EventHandler + + objType reflect.Type + zcc *zoneClusterController + syncFunc func([]interface{}) error +} + +// zoneClusterControllerEventHandler functions + +// AddResource adds the specified object to the cluster according to its type and +// returns the error, if any, yielded during object creation. +func (h *zoneClusterControllerEventHandler) AddResource(obj interface{}, fromRetryLoop bool) error { + var err error + + switch h.objType { + case factory.NodeType: + node, ok := obj.(*corev1.Node) + if !ok { + return fmt.Errorf("could not cast %T object to *corev1.Node", obj) + } + if err = h.zcc.handleAddUpdateNodeEvent(node); err != nil { + return fmt.Errorf("node add failed for %s, will try again later: %w", + node.Name, err) + } + default: + return fmt.Errorf("no add function for object type %s", h.objType) + } + return nil +} + +// UpdateResource updates the specified object in the cluster to its version in newObj according +// to its type and returns the error, if any, yielded during the object update. +// The inRetryCache boolean argument is to indicate if the given resource is in the retryCache or not. +func (h *zoneClusterControllerEventHandler) UpdateResource(oldObj, newObj interface{}, inRetryCache bool) error { + var err error + + switch h.objType { + case factory.NodeType: + node, ok := newObj.(*corev1.Node) + if !ok { + return fmt.Errorf("could not cast %T object to *corev1.Node", newObj) + } + if err = h.zcc.handleAddUpdateNodeEvent(node); err != nil { + return fmt.Errorf("node update failed for %s, will try again later: %w", + node.Name, err) + } + default: + return fmt.Errorf("no update function for object type %s", h.objType) + } + return nil +} + +// DeleteResource deletes the object from the cluster according to the delete logic of its resource type. +// cachedObj is the internal cache entry for this object, used for now for pods and network policies. +func (h *zoneClusterControllerEventHandler) DeleteResource(obj, cachedObj interface{}) error { + switch h.objType { + case factory.NodeType: + node, ok := obj.(*corev1.Node) + if !ok { + return fmt.Errorf("could not cast obj of type %T to *knet.Node", obj) + } + return h.zcc.handleDeleteNode(node) + } + return nil +} + +func (h *zoneClusterControllerEventHandler) SyncFunc(objs []interface{}) error { + var syncFunc func([]interface{}) error + + if h.syncFunc != nil { + // syncFunc was provided explicitly + syncFunc = h.syncFunc + } else { + switch h.objType { + case factory.NodeType: + syncFunc = h.zcc.syncNodes + + default: + return fmt.Errorf("no sync function for object type %s", h.objType) + } + } + if syncFunc == nil { + return nil + } + return syncFunc(objs) +} + +// RecordAddEvent records the add event on this object. Not used here. +func (h *zoneClusterControllerEventHandler) RecordAddEvent(obj interface{}) { +} + +// RecordUpdateEvent records the update event on this object. Not used here. +func (h *zoneClusterControllerEventHandler) RecordUpdateEvent(obj interface{}) { +} + +// RecordDeleteEvent records the delete event on this object. Not used here. +func (h *zoneClusterControllerEventHandler) RecordDeleteEvent(obj interface{}) { +} + +func (h *zoneClusterControllerEventHandler) RecordSuccessEvent(obj interface{}) { +} + +// RecordErrorEvent records an error event on this object. Not used here. +func (h *zoneClusterControllerEventHandler) RecordErrorEvent(obj interface{}, reason string, err error) { +} + +// isResourceScheduled returns true if the object has been scheduled. Always returns true. +func (h *zoneClusterControllerEventHandler) IsResourceScheduled(obj interface{}) bool { + return true +} + +// IsObjectInTerminalState returns true if the object is a in terminal state. Always returns false. +func (h *zoneClusterControllerEventHandler) IsObjectInTerminalState(obj interface{}) bool { + return false +} + +func (h *zoneClusterControllerEventHandler) AreResourcesEqual(obj1, obj2 interface{}) (bool, error) { + // switch based on type + if h.objType == factory.NodeType { + node1, ok := obj1.(*corev1.Node) + if !ok { + return false, fmt.Errorf("could not cast obj1 of type %T to *corev1.Node", obj1) + } + node2, ok := obj2.(*corev1.Node) + if !ok { + return false, fmt.Errorf("could not cast obj2 of type %T to *corev1.Node", obj2) + } + + // Check if the annotations have changed. + return !util.NodeIDAnnotationChanged(node1, node2), nil + } + + return false, nil +} + +// GetInternalCacheEntry returns the internal cache entry for this object +func (h *zoneClusterControllerEventHandler) GetInternalCacheEntry(obj interface{}) interface{} { + return nil +} + +// getResourceFromInformerCache returns the latest state of the object from the informers cache +// given an object key and its type +func (h *zoneClusterControllerEventHandler) GetResourceFromInformerCache(key string) (interface{}, error) { + var obj interface{} + var name string + var err error + + _, name, err = cache.SplitMetaNamespaceKey(key) + if err != nil { + return nil, fmt.Errorf("failed to split key %s: %w", key, err) + } + + switch h.objType { + case factory.NodeType: + obj, err = h.zcc.watchFactory.GetNode(name) + + default: + err = fmt.Errorf("object type %s not supported, cannot retrieve it from informers cache", + h.objType) + } + return obj, err +} diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index 5e4c1fa254..07f523bba4 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -7,6 +7,7 @@ import ( "net" "strconv" + corev1 "k8s.io/api/core/v1" kapi "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" @@ -74,6 +75,12 @@ const ( // capacity for each node. It is set by // openshift/cloud-network-config-controller cloudEgressIPConfigAnnotationKey = "cloud.network.openshift.io/egress-ipconfig" + + // ovnNodeID is the id (of type integer) of a node. It is set by cluster-manager. + ovnNodeID = "k8s.ovn.org/node-id" + + // InvalidNodeID indicates an invalid node id + InvalidNodeID = -1 ) type L3GatewayConfig struct { @@ -560,3 +567,35 @@ func ParseNodeHostAddresses(node *kapi.Node) (sets.Set[string], error) { return sets.New(cfg...), nil } + +// UpdateNodeIDAnnotation updates the ovnNodeID annotation with the node id in the annotations map +// and returns it. +func UpdateNodeIDAnnotation(annotations map[string]interface{}, nodeID int) map[string]interface{} { + if annotations == nil { + annotations = make(map[string]interface{}) + } + + annotations[ovnNodeID] = strconv.Itoa(nodeID) + return annotations +} + +// GetNodeID returns the id of the node set in the 'ovnNodeID' node annotation. +// Returns InvalidNodeID (-1) if the 'ovnNodeID' node annotation is not set or if the value is +// not an integer value. +func GetNodeID(node *kapi.Node) int { + nodeID, ok := node.Annotations[ovnNodeID] + if !ok { + return InvalidNodeID + } + + id, err := strconv.Atoi(nodeID) + if err != nil { + return InvalidNodeID + } + return id +} + +// NodeIDAnnotationChanged returns true if the ovnNodeID in the corev1.Nodes doesn't match +func NodeIDAnnotationChanged(oldNode, newNode *corev1.Node) bool { + return oldNode.Annotations[ovnNodeID] != newNode.Annotations[ovnNodeID] +} From da27d62335b05139476717d504fa35c0f16dd06b Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Wed, 22 Jun 2022 16:34:10 -0400 Subject: [PATCH 02/90] ovnkube-node: Set node zone name in the node annotation. This patch adds the concept of "zone" where in an k8s deployment nodes can be grouped into one or more zones. Some of the properties of a zone are: - A zone can have one or more nodes - A node can be part of only one zone. - Each zone becomes an independent OVN deployment i.e OVN DB servers (standalone or raft) and ovn-northd for each zone and ovn-controllers running on each node of a zone connecting to the zone OVN DB servers. For each zone, a deployment should set the zone name in the OVN Northbound database's "name" column of the NB_Global table singleton row and in the "options:name" column of the NB_Global table. Eg. to set the zone name as 'foo' ovn-nbctl set NB_Global . name=foo ovn-nbctl set NB_Global . options:name=foo ovn-northd will copy the options:name from NB_Global to Southbound database SB_Global options. ovnkube-node during startup will read the Southbound database zone name from the Southbound database with the command : "ovn-sbctl get SB_Global . options:name" and stores it in the node annotation - k8s.ovn.org/zone-name. If the zone name is not set in the Southbound database, default value "global" is used. ovnkube network controller manager in the upcoming commits will read this annotation value to figure out if a node belongs to its local zone or to a remote zone. This commit only support single global zone. Upcoming commits will support multiple zones. Signed-off-by: Numan Siddique --- .../node/default_node_network_controller.go | 25 ++++++++++++++++++- go-controller/pkg/types/const.go | 2 ++ go-controller/pkg/util/node_annotations.go | 21 ++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 444369a7a7..6fcc665959 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -491,6 +491,21 @@ func createNodeManagementPorts(name string, nodeAnnotator kube.Annotator, waiter return mgmtPorts, mgmtPortConfig, nil } +// getOVNSBZone returns the zone name stored in the Southbound db. +// It returns the default zone name if "options:name" is not set in the SB_Global row +func getOVNSBZone() (string, error) { + dbZone, stderr, err := util.RunOVNSbctl("get", "SB_Global", ".", "options:name") + if err != nil { + if strings.Contains(stderr, "ovn-sbctl: no key \"name\" in SB_Global record") { + // If the options:name is not present, assume default zone + return types.OvnDefaultZone, nil + } + return "", err + } + + return dbZone, nil +} + // Start learns the subnets assigned to it by the master controller // and calls the SetupNode script which establishes the logical switch func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { @@ -607,8 +622,16 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } } + sbZone, err := getOVNSBZone() + if err != nil { + return fmt.Errorf("failed to get the zone name from the OVN Southbound db server, err : %w", err) + } + if err := util.SetNodeZone(nodeAnnotator, sbZone); err != nil { + return fmt.Errorf("failed to set node zone annotation for node %s: %w", nc.name, err) + } + if err := nodeAnnotator.Run(); err != nil { - return fmt.Errorf("failed to set node %s annotations: %v", nc.name, err) + return fmt.Errorf("failed to set node %s annotations: %w", nc.name, err) } // Wait for management port and gateway resources to be created by the master diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index bb3990da20..562babc253 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -185,4 +185,6 @@ const ( // db index keys // PrimaryIDKey is used as a primary client index PrimaryIDKey = OvnK8sPrefix + "/id" + + OvnDefaultZone = "global" ) diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index 07f523bba4..1ea5848141 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -14,6 +14,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ) // This handles the annotations used by the node to pass information about its local @@ -76,6 +77,10 @@ const ( // openshift/cloud-network-config-controller cloudEgressIPConfigAnnotationKey = "cloud.network.openshift.io/egress-ipconfig" + // ovnNodeZoneName is the zone to which the node belongs to. It is set by ovnkube-node. + // ovnkube-node gets the node's zone from the OVN Southbound database. + ovnNodeZoneName = "k8s.ovn.org/zone-name" + // ovnNodeID is the id (of type integer) of a node. It is set by cluster-manager. ovnNodeID = "k8s.ovn.org/node-id" @@ -599,3 +604,19 @@ func GetNodeID(node *kapi.Node) int { func NodeIDAnnotationChanged(oldNode, newNode *corev1.Node) bool { return oldNode.Annotations[ovnNodeID] != newNode.Annotations[ovnNodeID] } + +// SetNodeZone sets the node's zone in the 'ovnNodeZoneName' node annotation. +func SetNodeZone(nodeAnnotator kube.Annotator, zoneName string) error { + return nodeAnnotator.Set(ovnNodeZoneName, zoneName) +} + +// GetNodeZone returns the zone of the node set in the 'ovnNodeZoneName' node annotation. +// If the annotation is not set, it returns the 'default' zone name. +func GetNodeZone(node *kapi.Node) string { + zoneName, ok := node.Annotations[ovnNodeZoneName] + if !ok { + return types.OvnDefaultZone + } + + return zoneName +} From f916361b3990d7f32a698c2559e1648fcfeea01c Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Sat, 1 Apr 2023 11:36:45 -0400 Subject: [PATCH 03/90] cluster-manager: Allocate node's GW router port addresses. Presently logical switch manager (of default network controller) allocates the addresses for each node gateway router port connecting to the Join logical switch from the config.Gateway.V4JoinSubnet/V6JoinSubnet. In order to support multiple zones, this needs to be centralized in order to have unique addresses across the cluster. Cluster manager now allocates these addresses. It derives the addresses from the V4JoinSubnet/V6JoinSubnet config and the node id and it stores them in the existing node annotation - "k8s.ovn.org/node-gateway-router-lrp-ifaddr". Network controller manager will now read the addresses for the node gateway router port from this node annotation instead. Also 'JoinSwitchIPManager' is no longer needed and is removed from the code base. Note: This commit still doesn't support multiple zones. Upcoming commits will add the support. When multiple zones are supported, each zone's ovn_cluster_router router port connecting to the join logical switch will have the same IP (eg. 100.64.0.1/16). This will be fine though. Signed-off-by: Numan Siddique --- .../pkg/clustermanager/clustermanager_test.go | 311 ++++++++++++++++++ .../pkg/clustermanager/ip_generator.go | 42 +++ .../clustermanager/zone_cluster_controller.go | 69 +++- .../pkg/ovn/default_network_controller.go | 18 +- go-controller/pkg/ovn/hybrid_test.go | 28 +- .../logical_switch_manager.go | 207 ------------ go-controller/pkg/ovn/master.go | 95 +++--- go-controller/pkg/ovn/master_test.go | 34 +- go-controller/pkg/ovn/namespace.go | 2 +- go-controller/pkg/ovn/namespace_test.go | 8 +- go-controller/pkg/ovn/ovn.go | 3 - go-controller/pkg/util/node_annotations.go | 44 ++- 12 files changed, 539 insertions(+), 322 deletions(-) create mode 100644 go-controller/pkg/clustermanager/ip_generator.go diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index adb024d2fb..89e26d3d12 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -26,6 +26,9 @@ import ( const ( // ovnNodeIDAnnotaton is the node annotation name used to store the node id. ovnNodeIDAnnotaton = "k8s.ovn.org/node-id" + + // ovnNodeGRLRPAddrAnnotaton is the node annotation name used to store the node gateway router port ips. + ovnNodeGRLRPAddrAnnotaton = "k8s.ovn.org/node-gateway-router-lrp-ifaddr" ) var _ = ginkgo.Describe("Cluster Manager", func() { @@ -39,6 +42,7 @@ var _ = ginkgo.Describe("Cluster Manager", func() { const ( clusterIPNet string = "10.1.0.0" clusterCIDR string = clusterIPNet + "/16" + clusterv6CIDR string = "aef0::/48" hybridOverlayClusterCIDR string = "11.1.0.0/16/24" ) @@ -839,4 +843,311 @@ var _ = ginkgo.Describe("Cluster Manager", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) }) + + ginkgo.Context("Node gateway router port IP allocations", func() { + ginkgo.It("verify the node annotations", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterManager, err := NewClusterManager(fakeClient, f, "identity", wg, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer clusterManager.Stop() + + // Check that cluster manager has set the node-gateway-router-lrp-ifaddr annotation for each node. + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + gwLRPAddrs, err := util.ParseNodeGatewayRouterLRPAddrs(updatedNode) + if err != nil { + return err + } + + gomega.Expect(gwLRPAddrs).NotTo(gomega.BeNil()) + gomega.Expect(len(gwLRPAddrs)).To(gomega.Equal(2)) + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR + "," + clusterv6CIDR, + "-k8s-service-cidr=10.96.0.0/16,fd00:10:96::/112", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("clear the node annotations for gateway router port ips and check", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterManager, err := NewClusterManager(fakeClient, f, "identity", wg, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer clusterManager.Stop() + + nodeAddrs := make(map[string]string) + // Check that cluster manager has set the node-gateway-router-lrp-ifaddr annotation for each node. + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + gwLRPAddrs, err := util.ParseNodeGatewayRouterLRPAddrs(updatedNode) + if err != nil { + return err + } + gomega.Expect(gwLRPAddrs).NotTo(gomega.BeNil()) + gomega.Expect(len(gwLRPAddrs)).To(gomega.Equal(2)) + nodeAddrs[n.Name] = updatedNode.Annotations[ovnNodeGRLRPAddrAnnotaton] + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + // Clear the node-gateway-router-lrp-ifaddr annotation of nodes and make sure it is reset by cluster manager + // with the same addrs. + for _, n := range nodes { + nodeAnnotator := kube.NewNodeAnnotator(&kube.Kube{kubeFakeClient}, n.Name) + + nodeAnnotations := n.Annotations + for k, v := range nodeAnnotations { + nodeAnnotator.Set(k, v) + } + nodeAnnotator.Delete(ovnNodeGRLRPAddrAnnotaton) + err = nodeAnnotator.Run() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeGWRPIPs, ok := updatedNode.Annotations[ovnNodeGRLRPAddrAnnotaton] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have node gateway-router-lrp-ifaddr allocated", n.Name) + } + + gomega.Expect(nodeGWRPIPs).To(gomega.Equal(nodeAddrs[n.Name])) + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR + "," + clusterv6CIDR, + "-k8s-service-cidr=10.96.0.0/16,fd00:10:96::/112", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Stop cluster manager, change id of a node and verify the gateway router port addr node annotation", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + wg1 := &sync.WaitGroup{} + clusterManager, err := NewClusterManager(fakeClient, f, "identity", wg1, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + node3GWRPAnnotation := "" + // Check that cluster manager has set the node-gateway-router-lrp-ifaddr annotation for each node. + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + gwLRPAddrs, err := util.ParseNodeGatewayRouterLRPAddrs(updatedNode) + if err != nil { + return err + } + gomega.Expect(gwLRPAddrs).NotTo(gomega.BeNil()) + gomega.Expect(len(gwLRPAddrs)).To(gomega.Equal(2)) + + // Store the node 3's gw router port addresses + if updatedNode.Name == "node3" { + node3GWRPAnnotation = updatedNode.Annotations[ovnNodeGRLRPAddrAnnotaton] + } + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + // stop the cluster manager. + clusterManager.Stop() + wg1.Wait() + + updatedNodes := []v1.Node{} + + for _, n := range nodes { + updatedNode, _ := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if updatedNode.Name == "node3" { + // Change the id of node3 duplicate. + updatedNode.Annotations[ovnNodeIDAnnotaton] = "50" + } + updatedNodes = append(updatedNodes, *updatedNode) + } + + // Close the watch factory and create a new one + f.Shutdown() + kubeFakeClient = fake.NewSimpleClientset(&v1.NodeList{ + Items: updatedNodes, + }) + fakeClient = &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Start a new cluster manager + cm2, err := NewClusterManager(fakeClient, f, "cm2", wg, nil) + gomega.Expect(cm2).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = cm2.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer cm2.Stop() + + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), "node3", metav1.GetOptions{}) + if err != nil { + return err + } + + node3UpdatedGWRPAnnotation := updatedNode.Annotations[ovnNodeGRLRPAddrAnnotaton] + gomega.Expect(node3UpdatedGWRPAnnotation).NotTo(gomega.Equal(node3GWRPAnnotation)) + + gwLRPAddrs, err := util.ParseNodeGatewayRouterLRPAddrs(updatedNode) + if err != nil { + return err + } + gomega.Expect(gwLRPAddrs).NotTo(gomega.BeNil()) + gomega.Expect(len(gwLRPAddrs)).To(gomega.Equal(2)) + return nil + }).ShouldNot(gomega.HaveOccurred()) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR + "," + clusterv6CIDR, + "-k8s-service-cidr=10.96.0.0/16,fd00:10:96::/112", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) }) diff --git a/go-controller/pkg/clustermanager/ip_generator.go b/go-controller/pkg/clustermanager/ip_generator.go new file mode 100644 index 0000000000..5d0c4ca3e4 --- /dev/null +++ b/go-controller/pkg/clustermanager/ip_generator.go @@ -0,0 +1,42 @@ +package clustermanager + +import ( + "fmt" + "math/big" + "net" + + utilnet "k8s.io/utils/net" +) + +// ipGenerator is used to generate an IP from the provided CIDR and the index. +// It is not an allocator and doesn't maintain any cache. +type ipGenerator struct { + netCidr *net.IPNet + netBaseIP *big.Int +} + +// newIPGenerator returns an ipGenerator instance +func newIPGenerator(subnet string) (*ipGenerator, error) { + _, netCidr, err := net.ParseCIDR(subnet) + if err != nil { + return nil, fmt.Errorf("error parsing subnet string %s: %v", subnet, err) + } + + return &ipGenerator{ + netCidr: netCidr, + netBaseIP: utilnet.BigForIP(netCidr.IP), + }, nil +} + +// GenerateIP generates an IP from the base ip and the provided 'idx' +// and returns the IPNet with the generated IP and the netmask of +// cidr. If suppose the subnet was - 168.254.0.0/16 and the specified +// index is 10, it will return IPNet { IP : 168.254.0.10, Mask : 16} +// Returns error if the generated IP is out of network range. +func (ipGenerator *ipGenerator) GenerateIP(idx int) (*net.IPNet, error) { + ip := utilnet.AddIPOffset(ipGenerator.netBaseIP, idx) + if ipGenerator.netCidr.Contains(ip) { + return &net.IPNet{IP: ip, Mask: ipGenerator.netCidr.Mask}, nil + } + return nil, fmt.Errorf("generated ip %s from the idx %d is out of range in the network %s", ip.String(), idx, ipGenerator.netCidr.String()) +} diff --git a/go-controller/pkg/clustermanager/zone_cluster_controller.go b/go-controller/pkg/clustermanager/zone_cluster_controller.go index 69b4d7a56a..7de1c27791 100644 --- a/go-controller/pkg/clustermanager/zone_cluster_controller.go +++ b/go-controller/pkg/clustermanager/zone_cluster_controller.go @@ -3,6 +3,7 @@ package clustermanager import ( "context" "fmt" + "net" "reflect" "sync" @@ -10,6 +11,7 @@ import ( cache "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" objretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" @@ -36,6 +38,10 @@ type zoneClusterController struct { // ID allocator for the nodes nodeIDAllocator *idAllocator + + // node gateway router port IP generators (connecting to the join switch) + nodeGWRouterLRPIPv4Generator *ipGenerator + nodeGWRouterLRPIPv6Generator *ipGenerator } func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory) (*zoneClusterController, error) { @@ -49,17 +55,39 @@ func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *fa if err := nodeIDAllocator.reserveID("zero", 0); err != nil { return nil, fmt.Errorf("idAllocator failed to reserve id 0") } + if err := nodeIDAllocator.reserveID("one", 1); err != nil { + return nil, fmt.Errorf("idAllocator failed to reserve id 1") + } kube := &kube.Kube{ KClient: ovnClient.KubeClient, } wg := &sync.WaitGroup{} + + var nodeGWRouterLRPIPv4Generator, nodeGWRouterLRPIPv6Generator *ipGenerator + + if config.IPv4Mode { + nodeGWRouterLRPIPv4Generator, err = newIPGenerator(config.Gateway.V4JoinSubnet) + if err != nil { + return nil, fmt.Errorf("error creating IP Generator for v4 join subnet %s: %w", config.Gateway.V4JoinSubnet, err) + } + } + + if config.IPv6Mode { + nodeGWRouterLRPIPv6Generator, err = newIPGenerator(config.Gateway.V6JoinSubnet) + if err != nil { + return nil, fmt.Errorf("error creating IP Generator for v6 join subnet %s: %w", config.Gateway.V6JoinSubnet, err) + } + } + zcc := &zoneClusterController{ - kube: kube, - watchFactory: wf, - stopChan: make(chan struct{}), - wg: wg, - nodeIDAllocator: nodeIDAllocator, + kube: kube, + watchFactory: wf, + stopChan: make(chan struct{}), + wg: wg, + nodeIDAllocator: nodeIDAllocator, + nodeGWRouterLRPIPv4Generator: nodeGWRouterLRPIPv4Generator, + nodeGWRouterLRPIPv6Generator: nodeGWRouterLRPIPv6Generator, } zcc.initRetryFramework() @@ -112,6 +140,29 @@ func (zcc *zoneClusterController) handleAddUpdateNodeEvent(node *corev1.Node) er klog.V(5).Infof("Allocated id %d to the node %s", allocatedNodeID, node.Name) nodeAnnotations := util.UpdateNodeIDAnnotation(nil, allocatedNodeID) + // Allocate the IP address(es) for the node Gateway router port connecting + // to the Join switch + var v4Addr, v6Addr *net.IPNet + if config.IPv4Mode { + v4Addr, err = zcc.nodeGWRouterLRPIPv4Generator.GenerateIP(allocatedNodeID) + if err != nil { + return fmt.Errorf("failed to generate gateway router port IPv4 address for node %s : err - %w", node.Name, err) + } + } + + if config.IPv6Mode { + v6Addr, err = zcc.nodeGWRouterLRPIPv6Generator.GenerateIP(allocatedNodeID) + if err != nil { + return fmt.Errorf("failed to generate gateway router port IPv6 address for node %s : err - %w", node.Name, err) + } + } + + nodeAnnotations, err = util.CreateNodeGatewayRouterLRPAddrAnnotation(nodeAnnotations, v4Addr, v6Addr) + if err != nil { + return fmt.Errorf("failed to marshal node %q annotation for Gateway LRP IPs, err : %v", + node.Name, err) + } + return zcc.kube.SetAnnotationsOnNode(node.Name, nodeAnnotations) } @@ -289,7 +340,13 @@ func (h *zoneClusterControllerEventHandler) AreResourcesEqual(obj1, obj2 interfa } // Check if the annotations have changed. - return !util.NodeIDAnnotationChanged(node1, node2), nil + if util.NodeIDAnnotationChanged(node1, node2) { + return false, nil + } + if util.NodeGatewayRouterLRPAddrAnnotationChanged(node1, node2) { + return false, nil + } + return true, nil } return false, nil diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index c45ed62937..add5ea5ec8 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -3,6 +3,7 @@ package ovn import ( "context" "fmt" + "net" "reflect" "sync" "time" @@ -25,6 +26,7 @@ import ( lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -96,8 +98,6 @@ type DefaultNetworkController struct { egressFirewallDNS *EgressDNS - joinSwIPManager *lsm.JoinSwitchIPManager - // retry framework for egress firewall retryEgressFirewalls *retry.RetryFramework @@ -127,6 +127,10 @@ type DefaultNetworkController struct { // variable to determine if all pods present on the node during startup have been processed // updated atomically allInitialPodsProcessed uint32 + + // IP addresses of OVN Cluster logical router port ("GwRouterToJoinSwitchPrefix + OVNClusterRouter") + // connecting to the join switch + ovnClusterLRPToJoinIfAddrs []*net.IPNet } // NewDefaultNetworkController creates a new OVN controller for creating logical network @@ -189,12 +193,20 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, clusterLoadBalancerGroupUUID: "", switchLoadBalancerGroupUUID: "", routerLoadBalancerGroupUUID: "", - joinSwIPManager: nil, svcController: svcController, svcFactory: svcFactory, egressSvcController: egressSvcController, } + // Allocate IPs for logical router port "GwRouterToJoinSwitchPrefix + OVNClusterRouter". This should always + // allocate the first IPs in the join switch subnets. + gwLRPIfAddrs, err := oc.getOVNClusterRouterPortToJoinSwitchIfAddrs() + if err != nil { + return nil, fmt.Errorf("failed to allocate join switch IP address connected to %s: %v", types.OVNClusterRouter, err) + } + + oc.ovnClusterLRPToJoinIfAddrs = gwLRPIfAddrs + oc.initRetryFramework() return oc, nil } diff --git a/go-controller/pkg/ovn/hybrid_test.go b/go-controller/pkg/ovn/hybrid_test.go index 77df209307..b9a63ce8b6 100644 --- a/go-controller/pkg/ovn/hybrid_test.go +++ b/go-controller/pkg/ovn/hybrid_test.go @@ -22,7 +22,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" - lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -136,8 +135,6 @@ func setupClusterController(clusterController *DefaultNetworkController, cluster clusterController.routerLoadBalancerGroupUUID = routerLBUUID clusterController.defaultCOPPUUID, err = EnsureDefaultCOPP(clusterController.nbClient) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - clusterController.joinSwIPManager, _ = lsm.NewJoinLogicalSwitchIPManager(clusterController.nbClient, expectedNodeSwitchUUID, []string{node1Name}) - } var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { @@ -297,7 +294,7 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { NodeMgmtPortMAC: "0a:58:64:40:00:03", DnatSnatIP: "169.254.0.1", } - testNode := node1.k8sNode() + testNode := node1.k8sNode("2") kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ Items: []v1.Node{testNode}, @@ -393,8 +390,6 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) setupClusterController(clusterController, expectedClusterLBGroup.UUID, expectedSwitchLBGroup.UUID, expectedRouterLBGroup.UUID, expectedNodeSwitch.UUID, node1.Name) - _, _ = clusterController.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter) - //assuming all the pods have finished processing atomic.StoreUint32(&clusterController.allInitialPodsProcessed, 1) @@ -585,7 +580,7 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { DnatSnatIP: "169.254.0.1", } - testNode := node1.k8sNode() + testNode := node1.k8sNode("2") kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ Items: []v1.Node{testNode}, @@ -712,7 +707,6 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) setupClusterController(clusterController, expectedClusterLBGroup.UUID, expectedSwitchLBGroup.UUID, expectedRouterLBGroup.UUID, expectedNodeSwitch.UUID, node1.Name) - _, _ = clusterController.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter) err = clusterController.syncGatewayLogicalNetwork(updatedNode, l3GatewayConfig, []*net.IPNet{subnet}, hostAddrs) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -786,7 +780,7 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { NodeMgmtPortMAC: "0a:58:0a:01:01:02", DnatSnatIP: "169.254.0.1", } - testNode := node1.k8sNode() + testNode := node1.k8sNode("2") kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ Items: []v1.Node{ @@ -894,8 +888,6 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) setupClusterController(clusterController, expectedClusterLBGroup.UUID, expectedSwitchLBGroup.UUID, expectedRouterLBGroup.UUID, expectedNodeSwitch.UUID, node1.Name) - _, _ = clusterController.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter) - //assuming all the pods have finished processing atomic.StoreUint32(&clusterController.allInitialPodsProcessed, 1) // Let the real code run and ensure OVN database sync @@ -1079,7 +1071,7 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { NodeMgmtPortMAC: "0a:58:64:40:00:03", DnatSnatIP: "169.254.0.1", } - testNode := node1.k8sNode() + testNode := node1.k8sNode("2") kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ Items: []v1.Node{testNode}, @@ -1175,8 +1167,6 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) setupClusterController(clusterController, expectedClusterLBGroup.UUID, expectedSwitchLBGroup.UUID, expectedRouterLBGroup.UUID, expectedNodeSwitch.UUID, node1.Name) - _, _ = clusterController.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter) - //assuming all the pods have finished processing atomic.StoreUint32(&clusterController.allInitialPodsProcessed, 1) // Let the real code run and ensure OVN database sync @@ -1298,8 +1288,12 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { NodeMgmtPortMAC: "0a:58:64:40:00:03", DnatSnatIP: "169.254.0.1", } - testNode := node1.k8sNode() - testNode.Annotations = map[string]string{hotypes.HybridOverlayDRIP: nodeHOIP, hotypes.HybridOverlayDRMAC: nodeHOMAC} + testNode := node1.k8sNode("2") + testNode.Annotations = map[string]string{ + hotypes.HybridOverlayDRIP: nodeHOIP, + hotypes.HybridOverlayDRMAC: nodeHOMAC, + "k8s.ovn.org/ovn-node-id": "2", + "k8s.ovn.org/node-gateway-router-lrp-ifaddr": "{\"ipv4\": \"100.64.0.2/16\"}"} kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ Items: []v1.Node{testNode}, @@ -1391,8 +1385,6 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) setupClusterController(clusterController, expectedClusterLBGroup.UUID, expectedSwitchLBGroup.UUID, expectedRouterLBGroup.UUID, expectedNodeSwitch.UUID, node1.Name) - _, _ = clusterController.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter) - gomega.Eventually(func() (map[string]string, error) { updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), testNode.Name, metav1.GetOptions{}) if err != nil { diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go index 5fd401ab3b..36cfea5844 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go @@ -8,12 +8,9 @@ import ( "reflect" "sync" - libovsdbclient "github.com/ovn-org/libovsdb/client" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator" ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator/allocator" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" @@ -405,210 +402,6 @@ func (manager *LogicalSwitchManager) ConditionalIPRelease(switchName string, ipn return false, nil } -// IP allocator manager for join switch's IPv4 and IPv6 subnets. -type JoinSwitchIPManager struct { - lsm *LogicalSwitchManager - nbClient libovsdbclient.Client - lrpIPCache map[string][]*net.IPNet - lrpIPCacheLock sync.Mutex -} - -// NewJoinIPAMAllocator provides an ipam interface which can be used for join switch IPAM -// allocations for the specified cidr using a contiguous allocation strategy. -func NewJoinIPAMAllocator(cidr *net.IPNet) (ipam.Interface, error) { - subnetRange, err := ipam.NewAllocatorCIDRRange(cidr, func(max int, rangeSpec string) (allocator.Interface, error) { - return allocator.NewContiguousAllocationMap(max, rangeSpec), nil - }) - if err != nil { - return nil, err - } - return subnetRange, nil -} - -// Initializes a new join switch logical switch manager. -// This IPmanager guaranteed to always have both IPv4 and IPv6 regardless of dual-stack -func NewJoinLogicalSwitchIPManager(nbClient libovsdbclient.Client, uuid string, existingNodeNames []string) (*JoinSwitchIPManager, error) { - j := JoinSwitchIPManager{ - lsm: &LogicalSwitchManager{ - cache: make(map[string]logicalSwitchInfo), - ipamFunc: NewJoinIPAMAllocator, - }, - nbClient: nbClient, - lrpIPCache: make(map[string][]*net.IPNet), - } - var joinSubnets []*net.IPNet - joinSubnetsConfig := []string{} - if config.IPv4Mode { - joinSubnetsConfig = append(joinSubnetsConfig, config.Gateway.V4JoinSubnet) - } - if config.IPv6Mode { - joinSubnetsConfig = append(joinSubnetsConfig, config.Gateway.V6JoinSubnet) - } - for _, joinSubnetString := range joinSubnetsConfig { - _, joinSubnet, err := net.ParseCIDR(joinSubnetString) - if err != nil { - return nil, fmt.Errorf("error parsing join subnet string %s: %v", joinSubnetString, err) - } - joinSubnets = append(joinSubnets, joinSubnet) - } - err := j.lsm.AddSwitch(types.OVNJoinSwitch, uuid, joinSubnets) - if err != nil { - return nil, err - } - for _, nodeName := range existingNodeNames { - gwLRPIPs := j.getJoinLRPAddresses(nodeName) - if len(gwLRPIPs) > 0 { - klog.Infof("Initializing and reserving the join switch IP for node: %s to: %v", nodeName, gwLRPIPs) - if err := j.reserveJoinLRPIPs(nodeName, gwLRPIPs); err != nil { - return nil, fmt.Errorf("error initiliazing and reserving the join switch IP for node: %s, err: %v", nodeName, err) - } - } - } - return &j, nil -} - -func (jsIPManager *JoinSwitchIPManager) getJoinLRPCacheIPs(nodeName string) ([]*net.IPNet, bool) { - gwLRPIPs, ok := jsIPManager.lrpIPCache[nodeName] - return gwLRPIPs, ok -} - -func sameIPs(a, b []*net.IPNet) bool { - if len(a) != len(b) { - return false - } - for _, aip := range a { - found := false - for _, bip := range b { - if aip.String() == bip.String() { - found = true - break - } - } - if !found { - return false - } - } - return true -} - -func (jsIPManager *JoinSwitchIPManager) setJoinLRPCacheIPs(nodeName string, gwLRPIPs []*net.IPNet) error { - if oldIPs, ok := jsIPManager.lrpIPCache[nodeName]; ok && !sameIPs(oldIPs, gwLRPIPs) { - return fmt.Errorf("join switch IPs %v already cached", oldIPs) - } - jsIPManager.lrpIPCache[nodeName] = gwLRPIPs - return nil -} - -func (jsIPManager *JoinSwitchIPManager) delJoinLRPCacheIPs(nodeName string) { - delete(jsIPManager.lrpIPCache, nodeName) -} - -// reserveJoinLRPIPs tries to add the LRP IPs to the joinSwitchIPManager, then they will be stored in the cache; -func (jsIPManager *JoinSwitchIPManager) reserveJoinLRPIPs(nodeName string, gwLRPIPs []*net.IPNet) error { - // reserve the given IP in the allocator - if err := jsIPManager.lsm.AllocateIPs(types.OVNJoinSwitch, gwLRPIPs); err != nil { - return err - } - - // store the allocated IPs in the cache if possible - if err := jsIPManager.setJoinLRPCacheIPs(nodeName, gwLRPIPs); err != nil { - // if storing the IPs to the cache fails, release the IPs again and return the error - klog.Errorf("Failed to add node %s reserved IPs %v to the join switch IP cache: %s", nodeName, gwLRPIPs, err.Error()) - if relErr := jsIPManager.lsm.ReleaseIPs(types.OVNJoinSwitch, gwLRPIPs); relErr != nil { - klog.Errorf("Failed to release logical router port IPs %v just reserved for node %s: %q", - util.JoinIPNetIPs(gwLRPIPs, " "), nodeName, relErr) - } - return err - } - - return nil -} - -// ensureJoinLRPIPs tries to allocate the LRP IPs if it is not yet allocated, then they will be stored in the cache -func (jsIPManager *JoinSwitchIPManager) EnsureJoinLRPIPs(nodeName string) (gwLRPIPs []*net.IPNet, err error) { - jsIPManager.lrpIPCacheLock.Lock() - defer jsIPManager.lrpIPCacheLock.Unlock() - // first check the IP cache, return if an entry already exists - gwLRPIPs, ok := jsIPManager.getJoinLRPCacheIPs(nodeName) - if ok { - return gwLRPIPs, nil - } - // second check the running DB - gwLRPIPs = jsIPManager.getJoinLRPAddresses(nodeName) - if len(gwLRPIPs) > 0 { - // Saving the hit in the cache - err = jsIPManager.reserveJoinLRPIPs(nodeName, gwLRPIPs) - if err != nil { - klog.Errorf("Failed to add reserve IPs to the join switch IP cache: %s", err.Error()) - return nil, err - } - return gwLRPIPs, nil - } - gwLRPIPs, err = jsIPManager.lsm.AllocateNextIPs(types.OVNJoinSwitch) - if err != nil { - return nil, err - } - - defer func() { - if err != nil { - if relErr := jsIPManager.lsm.ReleaseIPs(types.OVNJoinSwitch, gwLRPIPs); relErr != nil { - klog.Errorf("Failed to release logical router port IPs %v for node %s: %q", - util.JoinIPNetIPs(gwLRPIPs, " "), nodeName, relErr) - } - } - }() - - if err = jsIPManager.setJoinLRPCacheIPs(nodeName, gwLRPIPs); err != nil { - klog.Errorf("Failed to add node %s reserved IPs %v to the join switch IP cache: %s", nodeName, gwLRPIPs, err.Error()) - return nil, err - } - - return gwLRPIPs, nil -} - -// getJoinLRPAddresses check if IPs of gateway logical router port are within the join switch IP range, and return them if true. -func (jsIPManager *JoinSwitchIPManager) getJoinLRPAddresses(nodeName string) []*net.IPNet { - // try to get the IPs from the logical router port - gwLRPIPs := []*net.IPNet{} - gwLrpName := types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + nodeName - joinSubnets := jsIPManager.lsm.GetSwitchSubnets(types.OVNJoinSwitch) - ifAddrs, err := util.GetLRPAddrs(jsIPManager.nbClient, gwLrpName) - if err == nil { - for _, ifAddr := range ifAddrs { - for _, subnet := range joinSubnets { - if subnet.Contains(ifAddr.IP) { - gwLRPIPs = append(gwLRPIPs, &net.IPNet{IP: ifAddr.IP, Mask: subnet.Mask}) - break - } - } - } - } - - if len(gwLRPIPs) != len(joinSubnets) { - var errStr string - if len(gwLRPIPs) == 0 { - errStr = "Failed to get IPs" - } else { - errStr = fmt.Sprintf("Invalid IPs %s (possibly not in the range of subnet %s)", - util.JoinIPNets(gwLRPIPs, " "), util.JoinIPNets(joinSubnets, " ")) - } - klog.Warningf("%s for logical router port %s", errStr, gwLrpName) - return []*net.IPNet{} - } - return gwLRPIPs -} - -func (jsIPManager *JoinSwitchIPManager) ReleaseJoinLRPIPs(nodeName string) (err error) { - jsIPManager.lrpIPCacheLock.Lock() - defer jsIPManager.lrpIPCacheLock.Unlock() - gwLRPIPs, ok := jsIPManager.getJoinLRPCacheIPs(nodeName) - if ok { - err = jsIPManager.lsm.ReleaseIPs(types.OVNJoinSwitch, gwLRPIPs) - jsIPManager.delJoinLRPCacheIPs(nodeName) - } - return err -} - // NewL2SwitchManager initializes a new layer2 logical switch manager, // only manage subnet for the one specified switch func NewL2SwitchManager() *LogicalSwitchManager { diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index c67c8d658e..0de09966b9 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -23,7 +23,6 @@ import ( hotypes "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/types" houtil "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/util" - lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -189,27 +188,13 @@ func (oc *DefaultNetworkController) SetupMaster(existingNodeNames []string) erro return fmt.Errorf("failed to create logical switch %+v: %v", logicalSwitch, err) } - // Initialize the OVNJoinSwitch switch IP manager - // The OVNJoinSwitch will be allocated IP addresses in the range 100.64.0.0/16 or fd98::/64. - oc.joinSwIPManager, err = lsm.NewJoinLogicalSwitchIPManager(oc.nbClient, logicalSwitch.UUID, existingNodeNames) - if err != nil { - return err - } - - // Allocate IPs for logical router port "GwRouterToJoinSwitchPrefix + OVNClusterRouter". This should always - // allocate the first IPs in the join switch subnets - gwLRPIfAddrs, err := oc.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter) - if err != nil { - return fmt.Errorf("failed to allocate join switch IP address connected to %s: %v", types.OVNClusterRouter, err) - } - // Connect the distributed router to OVNJoinSwitch. drSwitchPort := types.JoinSwitchToGWRouterPrefix + types.OVNClusterRouter drRouterPort := types.GWRouterToJoinSwitchPrefix + types.OVNClusterRouter - gwLRPMAC := util.IPAddrToHWAddr(gwLRPIfAddrs[0].IP) + gwLRPMAC := util.IPAddrToHWAddr(oc.ovnClusterLRPToJoinIfAddrs[0].IP) gwLRPNetworks := []string{} - for _, gwLRPIfAddr := range gwLRPIfAddrs { + for _, gwLRPIfAddr := range oc.ovnClusterLRPToJoinIfAddrs { gwLRPNetworks = append(gwLRPNetworks, gwLRPIfAddr.String()) } logicalRouterPort := nbdb.LogicalRouterPort{ @@ -320,16 +305,14 @@ func (oc *DefaultNetworkController) syncGatewayLogicalNetwork(node *kapi.Node, l clusterSubnets = append(clusterSubnets, clusterSubnet.CIDR) } - gwLRPIPs, err = oc.joinSwIPManager.EnsureJoinLRPIPs(node.Name) + gwLRPIPs, err = util.ParseNodeGatewayRouterLRPAddrs(node) if err != nil { - return fmt.Errorf("failed to allocate join switch port IP address for node %s: %v", node.Name, err) + return fmt.Errorf("failed to get join switch port IP address for node %s: %v", node.Name, err) } - drLRPIPs, _ := oc.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter) - enableGatewayMTU := util.ParseNodeGatewayMTUSupport(node) - err = oc.gatewayInit(node.Name, clusterSubnets, hostSubnets, l3GatewayConfig, oc.SCTPSupport, gwLRPIPs, drLRPIPs, + err = oc.gatewayInit(node.Name, clusterSubnets, hostSubnets, l3GatewayConfig, oc.SCTPSupport, gwLRPIPs, oc.ovnClusterLRPToJoinIfAddrs, enableGatewayMTU) if err != nil { return fmt.Errorf("failed to init shared interface gateway: %v", err) @@ -356,16 +339,17 @@ func (oc *DefaultNetworkController) syncGatewayLogicalNetwork(node *kapi.Node, l func (oc *DefaultNetworkController) ensureNodeLogicalNetwork(node *kapi.Node, hostSubnets []*net.IPNet) error { var hostNetworkPolicyIPs []net.IP - switchName := node.Name for _, hostSubnet := range hostSubnets { mgmtIfAddr := util.GetNodeManagementIfAddr(hostSubnet) hostNetworkPolicyIPs = append(hostNetworkPolicyIPs, mgmtIfAddr.IP) } // also add the join switch IPs for this node - needed in shared gateway mode - lrpIPs, err := oc.joinSwIPManager.EnsureJoinLRPIPs(switchName) + // Note: join switch IPs for each node are generated by cluster manager and + // stored in the node annotation + lrpIPs, err := util.ParseNodeGatewayRouterLRPAddrs(node) if err != nil { - return fmt.Errorf("failed to get join switch port IP address for switch %s: %v", switchName, err) + return fmt.Errorf("failed to get join switch port IP address for node %s: %v", node.Name, err) } for _, lrpIP := range lrpIPs { @@ -417,29 +401,6 @@ func (oc *DefaultNetworkController) addNode(node *kapi.Node) ([]*net.IPNet, erro node.Name, config.IPv4Mode, haveV4, config.IPv6Mode, haveV6) } - gwLRPIPs, err := oc.joinSwIPManager.EnsureJoinLRPIPs(node.Name) - if err != nil { - return nil, fmt.Errorf("failed to allocate join switch port IP address for node %s: %v", node.Name, err) - } - var v4Addr, v6Addr *net.IPNet - for _, ip := range gwLRPIPs { - if ip.IP.To4() != nil { - v4Addr = ip - } else if ip.IP.To16() != nil { - v6Addr = ip - } - } - updatedNodeAnnotation, err := util.CreateNodeGatewayRouterLRPAddrAnnotation(nil, v4Addr, v6Addr) - if err != nil { - return nil, fmt.Errorf("failed to marshal node %q annotation for Gateway LRP IP %v", - node.Name, gwLRPIPs) - } - - err = oc.UpdateNodeAnnotationWithRetry(node.Name, updatedNodeAnnotation) - if err != nil { - return nil, err - } - // delete stale chassis in SBDB if any if err = oc.deleteStaleNodeChassis(node); err != nil { return nil, err @@ -514,10 +475,6 @@ func (oc *DefaultNetworkController) deleteNode(nodeName string) error { return fmt.Errorf("failed to clean up node %s gateway: (%v)", nodeName, err) } - if err := oc.joinSwIPManager.ReleaseJoinLRPIPs(nodeName); err != nil { - return fmt.Errorf("failed to clean up GR LRP IPs for node %s: %v", nodeName, err) - } - chassisTemplateVars := make([]*nbdb.ChassisTemplateVar, 0) p := func(item *sbdb.Chassis) bool { if item.Hostname == nodeName { @@ -621,12 +578,6 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { } foundNodes.Insert(node.Name) nodes = append(nodes, node) - - // For each existing node, reserve its joinSwitch LRP IPs if they already exist. - if _, err := oc.joinSwIPManager.EnsureJoinLRPIPs(node.Name); err != nil { - // TODO (flaviof): keep going even if EnsureJoinLRPIPs returned an error. Maybe we should not. - klog.Errorf("Failed to get join switch port IP address for node %s: %v", node.Name, err) - } } defaultNetworkPredicate := func(item *nbdb.LogicalSwitch) bool { @@ -882,3 +833,31 @@ func (oc *DefaultNetworkController) deleteNodeEvent(node *kapi.Node) error { oc.nodeClusterRouterPortFailed.Delete(node.Name) return nil } + +// getOVNClusterRouterPortToJoinSwitchIPs returns the IP addresses for the +// logical router port "GwRouterToJoinSwitchPrefix + OVNClusterRouter" from the +// config.Gateway.V4JoinSubnet and config.Gateway.V6JoinSubnet. This will +// always be the first IP from these subnets. +func (oc *DefaultNetworkController) getOVNClusterRouterPortToJoinSwitchIfAddrs() (gwLRPIPs []*net.IPNet, err error) { + joinSubnetsConfig := []string{} + if config.IPv4Mode { + joinSubnetsConfig = append(joinSubnetsConfig, config.Gateway.V4JoinSubnet) + } + if config.IPv6Mode { + joinSubnetsConfig = append(joinSubnetsConfig, config.Gateway.V6JoinSubnet) + } + for _, joinSubnetString := range joinSubnetsConfig { + _, joinSubnet, err := net.ParseCIDR(joinSubnetString) + if err != nil { + return nil, fmt.Errorf("error parsing join subnet string %s: %v", joinSubnetString, err) + } + joinSubnetBaseIP := utilnet.BigForIP(joinSubnet.IP) + ipnet := &net.IPNet{ + IP: utilnet.AddIPOffset(joinSubnetBaseIP, 1), + Mask: joinSubnet.Mask, + } + gwLRPIPs = append(gwLRPIPs, ipnet) + } + + return gwLRPIPs, nil +} diff --git a/go-controller/pkg/ovn/master_test.go b/go-controller/pkg/ovn/master_test.go index 9afacec3df..702124db2a 100644 --- a/go-controller/pkg/ovn/master_test.go +++ b/go-controller/pkg/ovn/master_test.go @@ -21,7 +21,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" - lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -68,10 +67,22 @@ type tNode struct { DnatSnatIP string } -func (n tNode) k8sNode() v1.Node { +const ( + // ovnNodeID is the id (of type integer) of a node. It is set by cluster-manager. + ovnNodeID = "k8s.ovn.org/node-id" + + // ovnNodeGRLRPAddr is the CIDR form representation of Gate Router LRP IP address to join switch (i.e: 100.64.0.5/24) + ovnNodeGRLRPAddr = "k8s.ovn.org/node-gateway-router-lrp-ifaddr" +) + +func (n tNode) k8sNode(nodeID string) v1.Node { node := v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: n.Name, + Annotations: map[string]string{ + ovnNodeID: nodeID, + ovnNodeGRLRPAddr: "{\"ipv4\": \"100.64.0." + nodeID + "/16\"}", + }, }, Status: kapi.NodeStatus{ Addresses: []kapi.NodeAddress{{Type: kapi.NodeExternalIP, Address: n.NodeIP}}, @@ -966,7 +977,7 @@ var _ = ginkgo.Describe("Default network controller operations", func() { datapath, }, } - testNode = node1.k8sNode() + testNode = node1.k8sNode("2") kubeFakeClient = fake.NewSimpleClientset(&v1.NodeList{ Items: []v1.Node{testNode}, @@ -1031,10 +1042,6 @@ var _ = ginkgo.Describe("Default network controller operations", func() { }() oc.SCTPSupport = true - oc.joinSwIPManager, err = lsm.NewJoinLogicalSwitchIPManager(oc.nbClient, expectedNodeSwitch.UUID, []string{node1.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = oc.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) expectedNBDatabaseState = addNodeLogicalFlows(nil, expectedOVNClusterRouter, expectedNodeSwitch, expectedClusterRouterPortGroup, expectedClusterPortGroup, &node1) }) @@ -1475,7 +1482,9 @@ var _ = ginkgo.Describe("Default network controller operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "newNode", Annotations: map[string]string{ - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"fd02:0:0:2::2895/64\"]}", newNodeSubnet), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"fd02:0:0:2::2895/64\"]}", newNodeSubnet), + "k8s.ovn.org/node-chassis-id": "2", + "k8s.ovn.org/node-gateway-router-lrp-ifaddr": "{\"ipv4\":\"100.64.0.2/16\"}", }, }, } @@ -1655,11 +1664,6 @@ func TestController_syncNodes(t *testing.T) { record.NewFakeRecorder(0), wg) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - controller.joinSwIPManager, err = lsm.NewJoinLogicalSwitchIPManager(nbClient, "", []string{}) - if err != nil { - t.Fatalf("%s: Error creating joinSwIPManager: %v", tt.name, err) - } - err = controller.syncNodes([]interface{}{&testNode}) if err != nil { t.Fatalf("%s: Error on syncNodes: %v", tt.name, err) @@ -1748,10 +1752,6 @@ func TestController_deleteStaleNodeChassis(t *testing.T) { record.NewFakeRecorder(0), wg) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - controller.joinSwIPManager, err = lsm.NewJoinLogicalSwitchIPManager(nbClient, "", []string{}) - if err != nil { - t.Fatalf("%s: Error creating joinSwIPManager: %v", tt.name, err) - } err = controller.deleteStaleNodeChassis(&tt.node) if err != nil { diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index ab14939768..92c4649828 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -288,7 +288,7 @@ func (oc *DefaultNetworkController) getAllHostNamespaceAddresses() []net.IP { } // for shared gateway mode we will use LRP IPs to SNAT host network traffic // so add these to the address set. - lrpIPs, err := oc.joinSwIPManager.EnsureJoinLRPIPs(node.Name) + lrpIPs, err := util.ParseNodeGatewayRouterLRPAddrs(node) if err != nil { klog.Errorf("Failed to get join switch port IP address for node %s: %v", node.Name, err) } diff --git a/go-controller/pkg/ovn/namespace_test.go b/go-controller/pkg/ovn/namespace_test.go index ec1b0df2fd..bf048da759 100644 --- a/go-controller/pkg/ovn/namespace_test.go +++ b/go-controller/pkg/ovn/namespace_test.go @@ -12,7 +12,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" - lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" @@ -220,7 +219,7 @@ var _ = ginkgo.Describe("OVN Namespace Operations", func() { DnatSnatIP: "169.254.0.1", } // create a test node and annotate it with host subnet - testNode := node1.k8sNode() + testNode := node1.k8sNode("2") hostNetworkNamespace := "test-host-network-ns" config.Kubernetes.HostNetworkNamespace = hostNetworkNamespace @@ -301,10 +300,7 @@ var _ = ginkgo.Describe("OVN Namespace Operations", func() { // be in the addressset yet, depending on if the host subnets annotation of the node exists in the informer cache. The addressset // can only be deterministic when WatchNamespaces() handles this host network namespace. - fakeOvn.controller.joinSwIPManager, _ = lsm.NewJoinLogicalSwitchIPManager(fakeOvn.nbClient, expectedNodeSwitch.UUID, []string{node1.Name}) - _, err = fakeOvn.controller.joinSwIPManager.EnsureJoinLRPIPs(ovntypes.OVNClusterRouter) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gwLRPIPs, err := fakeOvn.controller.joinSwIPManager.EnsureJoinLRPIPs(node1.Name) + gwLRPIPs, err := util.ParseNodeGatewayRouterLRPAddrs(&testNode) gomega.Expect(len(gwLRPIPs) != 0).To(gomega.BeTrue()) err = fakeOvn.controller.WatchNamespaces() diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index de520f6586..1f67f14b7e 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -251,9 +251,6 @@ func (oc *DefaultNetworkController) syncNodeGateway(node *kapi.Node, hostSubnets if err := oc.gatewayCleanup(node.Name); err != nil { return fmt.Errorf("error cleaning up gateway for node %s: %v", node.Name, err) } - if err := oc.joinSwIPManager.ReleaseJoinLRPIPs(node.Name); err != nil { - return err - } } else if hostSubnets != nil { var hostAddrs sets.Set[string] if config.Gateway.Mode == config.GatewayModeShared { diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index 1ea5848141..f861b9c7d8 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -359,10 +359,10 @@ func SetNodePrimaryIfAddrs(nodeAnnotator kube.Annotator, ifAddrs []*net.IPNet) ( } // CreateNodeGatewayRouterLRPAddrAnnotation sets the IPv4 / IPv6 values of the node's Gatewary Router LRP to join switch. -func CreateNodeGatewayRouterLRPAddrAnnotation(nodeAnnotation map[string]string, nodeIPNetv4, - nodeIPNetv6 *net.IPNet) (map[string]string, error) { +func CreateNodeGatewayRouterLRPAddrAnnotation(nodeAnnotation map[string]interface{}, nodeIPNetv4, + nodeIPNetv6 *net.IPNet) (map[string]interface{}, error) { if nodeAnnotation == nil { - nodeAnnotation = map[string]string{} + nodeAnnotation = make(map[string]interface{}) } primaryIfAddrAnnotation := primaryIfAddrAnnotation{} if nodeIPNetv4 != nil { @@ -379,6 +379,10 @@ func CreateNodeGatewayRouterLRPAddrAnnotation(nodeAnnotation map[string]string, return nodeAnnotation, nil } +func NodeGatewayRouterLRPAddrAnnotationChanged(oldNode, newNode *corev1.Node) bool { + return oldNode.Annotations[ovnNodeGRLRPAddr] != newNode.Annotations[ovnNodeGRLRPAddr] +} + const UnlimitedNodeCapacity = math.MaxInt32 type ifAddr struct { @@ -465,6 +469,40 @@ func ParseNodeGatewayRouterLRPAddr(node *kapi.Node) (net.IP, error) { return ip, nil } +// ParseNodeGatewayRouterLRPAddrs returns the IPv4 and/or IPv6 addresses for the node's gateway router port +// stored in the 'ovnNodeGRLRPAddr' annotation +func ParseNodeGatewayRouterLRPAddrs(node *kapi.Node) ([]*net.IPNet, error) { + nodeIfAddrAnnotation, ok := node.Annotations[ovnNodeGRLRPAddr] + if !ok { + return nil, newAnnotationNotSetError("%s annotation not found for node %q", ovnNodeGRLRPAddr, node.Name) + } + nodeIfAddr := primaryIfAddrAnnotation{} + if err := json.Unmarshal([]byte(nodeIfAddrAnnotation), &nodeIfAddr); err != nil { + return nil, fmt.Errorf("failed to unmarshal annotation: %s for node %q, err: %v", ovnNodeGRLRPAddr, node.Name, err) + } + if nodeIfAddr.IPv4 == "" && nodeIfAddr.IPv6 == "" { + return nil, fmt.Errorf("node: %q does not have any IP information set", node.Name) + } + var gwLRPAddrs []*net.IPNet + if nodeIfAddr.IPv4 != "" { + ip, ipNet, err := net.ParseCIDR(nodeIfAddr.IPv4) + if err != nil { + return nil, fmt.Errorf("failed to parse IPv4 address %s from annotation: %s for node %q, err: %v", nodeIfAddr.IPv4, ovnNodeGRLRPAddr, node.Name, err) + } + gwLRPAddrs = append(gwLRPAddrs, &net.IPNet{IP: ip, Mask: ipNet.Mask}) + } + + if nodeIfAddr.IPv6 != "" { + ip, ipNet, err := net.ParseCIDR(nodeIfAddr.IPv6) + if err != nil { + return nil, fmt.Errorf("failed to parse IPv6 address %s from annotation: %s for node %q, err: %v", nodeIfAddr.IPv6, ovnNodeGRLRPAddr, node.Name, err) + } + gwLRPAddrs = append(gwLRPAddrs, &net.IPNet{IP: ip, Mask: ipNet.Mask}) + } + + return gwLRPAddrs, nil +} + // ParseCloudEgressIPConfig returns the cloud's information concerning the node's primary network interface func ParseCloudEgressIPConfig(node *kapi.Node) (*ParsedNodeEgressIPConfiguration, error) { egressIPConfigAnnotation, ok := node.Annotations[cloudEgressIPConfigAnnotationKey] From cfd0e04646ac2aaa05237dc107bc8cbacd19d157 Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Wed, 22 Jun 2022 17:43:42 -0400 Subject: [PATCH 04/90] network-controller-manager: Manage the nodes which belong to local zone. This patch adds the concept of zones where a zone can have one or more nodes grouped in it. To start with this patch only supports one global zone (all the nodes belong to the global zone) and upcoming patches will add the support for multiple zones. Each zone requires - OVN Northbound and Southbound database cluster (either a single node or a raft cluster) - ovnkube network-controller-manager which connects to its local zone OVN db cluster and creates the OVN logical resources in the Northbound database. network-controller-manager can be run on a single node or multiple nodes with active/passive HA (etcd leader election). - ovn-northd which connects to the local zone OVN dbs. - ovnkube-node and ovn-controller on each node of the zone. network-controller-manager creates the node logical switch and other Northbound resources for a node only if the node belongs to its zone. Nodes with other zones are ignored. It also creates pod resources only if it is scheduled in the local zone node. network-controller-manager gets its zone name from the NB_Global.name of OVN Northbound databse. If the name is empty then the default value "global" is used. Signed-off-by: Numan Siddique --- .../pkg/ovn/base_network_controller.go | 38 +++++ .../pkg/ovn/base_network_controller_pods.go | 23 ++- .../pkg/ovn/default_network_controller.go | 112 ++++++++---- go-controller/pkg/ovn/egressgw_test.go | 160 +++++++++++++++++- go-controller/pkg/ovn/master.go | 61 +++++-- go-controller/pkg/ovn/multicast_test.go | 10 ++ go-controller/pkg/ovn/multipolicy_test.go | 5 + go-controller/pkg/ovn/namespace_test.go | 5 + go-controller/pkg/ovn/ovn.go | 26 +++ go-controller/pkg/ovn/ovn_test.go | 84 ++++++++- go-controller/pkg/ovn/pods.go | 10 +- go-controller/pkg/ovn/pods_test.go | 132 ++++++++++++++- go-controller/pkg/ovn/policy_test.go | 5 + go-controller/pkg/util/util.go | 17 ++ 14 files changed, 633 insertions(+), 55 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index 822418c99c..ee0fedad7d 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -63,6 +63,9 @@ type CommonNetworkControllerInfo struct { svcTemplateSupport bool // Is ACL logging enabled while configuring meters? aclLoggingEnabled bool + + // Northbound database zone name to which this Controller is connected to - aka local zone + zone string } // BaseNetworkController structure holds per-network fields and network specific configuration @@ -133,6 +136,11 @@ type BaseNetworkController struct { stopChan chan struct{} // waitGroup per-Controller wg *sync.WaitGroup + + // List of nodes which belong to the local zone (stored as a sync map) + // If the map is nil, it means the controller is not tracking the node events + // and all the nodes are considered as local zone nodes. + localZoneNodes *sync.Map } // BaseSecondaryNetworkController structure holds per-network fields and network specific @@ -147,6 +155,10 @@ type BaseSecondaryNetworkController struct { func NewCommonNetworkControllerInfo(client clientset.Interface, kube *kube.KubeOVN, wf *factory.WatchFactory, recorder record.EventRecorder, nbClient libovsdbclient.Client, sbClient libovsdbclient.Client, podRecorder *metrics.PodRecorder, SCTPSupport, multicastSupport, svcTemplateSupport, aclLoggingEnabled bool) (*CommonNetworkControllerInfo, error) { + zone, err := util.GetNBZone(nbClient) + if err != nil { + return nil, fmt.Errorf("error getting NB zone name : err - %w", err) + } return &CommonNetworkControllerInfo{ client: client, kube: kube, @@ -159,6 +171,7 @@ func NewCommonNetworkControllerInfo(client clientset.Interface, kube *kube.KubeO multicastSupport: multicastSupport, svcTemplateSupport: svcTemplateSupport, aclLoggingEnabled: aclLoggingEnabled, + zone: zone, }, nil } @@ -726,3 +739,28 @@ func (bnc *BaseNetworkController) getClusterPortGroupName(base string) string { } return base } + +// GetLocalZoneNodes returns the list of local zone nodes +// A node is considered a local zone node if the zone name +// set in the node's annotation matches with the zone name +// set in the OVN Northbound database (to which this controller is connected to). +func (bnc *BaseNetworkController) GetLocalZoneNodes() ([]*kapi.Node, error) { + nodes, err := bnc.watchFactory.GetNodes() + if err != nil { + return nil, fmt.Errorf("failed to get nodes: %v", err) + } + + var zoneNodes []*kapi.Node + for _, n := range nodes { + if bnc.isLocalZoneNode(n) { + zoneNodes = append(zoneNodes, n) + } + } + + return zoneNodes, nil +} + +// isLocalZoneNode returns true if the node is part of the local zone. +func (bnc *BaseNetworkController) isLocalZoneNode(node *kapi.Node) bool { + return util.GetNodeZone(node) == bnc.zone +} diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 54ebdf321a..0880e2ff0d 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -80,8 +80,8 @@ func (bnc *BaseNetworkController) deleteStaleLogicalSwitchPorts(expectedLogicalP // get all switches that Pod logical port would be reside on. topoType := bnc.TopologyType() if !bnc.IsSecondary() || topoType == ovntypes.Layer3Topology { - // for default network and layer3 topology type networks, get all node switches. - nodes, err := bnc.watchFactory.GetNodes() + // for default network and layer3 topology type networks, get all local zone node switches + nodes, err := bnc.GetLocalZoneNodes() if err != nil { return fmt.Errorf("failed to get nodes: %v", err) } @@ -855,6 +855,25 @@ func (bnc *BaseNetworkController) deletePodFromNamespace(ns string, podIfAddrs [ return ops, nil } +// isPodScheduledinLocalZone returns true if +// - bnc.localZoneNodes map is nil or +// - if the pod.Spec.NodeName is in the bnc.localZoneNodes map +// +// false otherwise. +func (bnc *BaseNetworkController) isPodScheduledinLocalZone(pod *kapi.Pod) bool { + isLocalZonePod := true + + if bnc.localZoneNodes != nil { + if util.PodScheduled(pod) { + _, isLocalZonePod = bnc.localZoneNodes.Load(pod.Spec.NodeName) + } else { + isLocalZonePod = false + } + } + + return isLocalZonePod +} + // WatchPods starts the watching of the Pod resource and calls back the appropriate handler logic func (bnc *BaseNetworkController) WatchPods() error { if bnc.podHandler != nil { diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index add5ea5ec8..cae05bf200 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -173,6 +173,7 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, podSelectorAddressSets: syncmap.NewSyncMap[*PodSelectorAddressSet](), stopChan: defaultStopChan, wg: defaultWg, + localZoneNodes: &sync.Map{}, }, externalGWCache: make(map[ktypes.NamespacedName]*externalRouteInfo), exGWCacheMutex: sync.RWMutex{}, @@ -687,27 +688,33 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from if !ok { return fmt.Errorf("could not cast %T object to *kapi.Node", obj) } - var nodeParams *nodeSyncs - if fromRetryLoop { - _, nodeSync := h.oc.addNodeFailed.Load(node.Name) - _, clusterRtrSync := h.oc.nodeClusterRouterPortFailed.Load(node.Name) - _, mgmtSync := h.oc.mgmtPortFailed.Load(node.Name) - _, gwSync := h.oc.gatewaysFailed.Load(node.Name) - _, hoSync := h.oc.hybridOverlayFailed.Load(node.Name) - nodeParams = &nodeSyncs{ - nodeSync, - clusterRtrSync, - mgmtSync, - gwSync, - hoSync} - } else { - nodeParams = &nodeSyncs{true, true, true, true, config.HybridOverlay.Enabled} - } + if h.oc.isLocalZoneNode(node) { + var nodeParams *nodeSyncs + if fromRetryLoop { + _, nodeSync := h.oc.addNodeFailed.Load(node.Name) + _, clusterRtrSync := h.oc.nodeClusterRouterPortFailed.Load(node.Name) + _, mgmtSync := h.oc.mgmtPortFailed.Load(node.Name) + _, gwSync := h.oc.gatewaysFailed.Load(node.Name) + _, hoSync := h.oc.hybridOverlayFailed.Load(node.Name) + nodeParams = &nodeSyncs{ + nodeSync, + clusterRtrSync, + mgmtSync, + gwSync, + hoSync} + } else { + nodeParams = &nodeSyncs{true, true, true, true, config.HybridOverlay.Enabled} + } - if err = h.oc.addUpdateNodeEvent(node, nodeParams); err != nil { - klog.Infof("Node add failed for %s, will try again later: %v", - node.Name, err) - return err + if err = h.oc.addUpdateLocalNodeEvent(node, nodeParams); err != nil { + klog.Infof("Node add failed for %s, will try again later: %v", + node.Name, err) + return err + } + } else { + if err = h.oc.addUpdateRemoteNodeEvent(node); err != nil { + return err + } } case factory.EgressFirewallType: @@ -808,19 +815,57 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int if !ok { return fmt.Errorf("could not cast oldObj of type %T to *kapi.Node", oldObj) } - // determine what actually changed in this update - _, nodeSync := h.oc.addNodeFailed.Load(newNode.Name) - _, failed := h.oc.nodeClusterRouterPortFailed.Load(newNode.Name) - clusterRtrSync := failed || nodeChassisChanged(oldNode, newNode) || nodeSubnetChanged(oldNode, newNode) - _, failed = h.oc.mgmtPortFailed.Load(newNode.Name) - mgmtSync := failed || macAddressChanged(oldNode, newNode) || nodeSubnetChanged(oldNode, newNode) - _, failed = h.oc.gatewaysFailed.Load(newNode.Name) - gwSync := (failed || gatewayChanged(oldNode, newNode) || - nodeSubnetChanged(oldNode, newNode) || hostAddressesChanged(oldNode, newNode) || - nodeGatewayMTUSupportChanged(oldNode, newNode)) - _, hoSync := h.oc.hybridOverlayFailed.Load(newNode.Name) - - return h.oc.addUpdateNodeEvent(newNode, &nodeSyncs{nodeSync, clusterRtrSync, mgmtSync, gwSync, hoSync}) + + // +--------------------+-------------------+-------------------------------------------------+ + // | oldNode | newNode | Action | + // |--------------------+-------------------+-------------------------------------------------+ + // | | | Node is remote. | + // | local | remote | Call addUpdateRemoteNodeEvent() | + // | | | | + // |--------------------+-------------------+-------------------------------------------------+ + // | | | Node is local | + // | local | local | Call addUpdateLocalNodeEvent() | + // | | | | + // |--------------------+-------------------+-------------------------------------------------+ + // | | | Node is local | + // | remote | local | Call addUpdateLocalNodeEvent(full sync) | + // | | | | + // |--------------------+-------------------+-------------------------------------------------+ + // | | | Node is remote | + // | remote | remote | Call addUpdateRemoteNodeEvent() | + // | | | | + // |--------------------+-------------------+-------------------------------------------------+ + if h.oc.isLocalZoneNode(newNode) { + var nodeSyncsParam *nodeSyncs + if h.oc.isLocalZoneNode(oldNode) { + // determine what actually changed in this update + _, nodeSync := h.oc.addNodeFailed.Load(newNode.Name) + _, failed := h.oc.nodeClusterRouterPortFailed.Load(newNode.Name) + clusterRtrSync := failed || nodeChassisChanged(oldNode, newNode) || nodeSubnetChanged(oldNode, newNode) + _, failed = h.oc.mgmtPortFailed.Load(newNode.Name) + mgmtSync := failed || macAddressChanged(oldNode, newNode) || nodeSubnetChanged(oldNode, newNode) + _, failed = h.oc.gatewaysFailed.Load(newNode.Name) + gwSync := (failed || gatewayChanged(oldNode, newNode) || + nodeSubnetChanged(oldNode, newNode) || hostAddressesChanged(oldNode, newNode) || + nodeGatewayMTUSupportChanged(oldNode, newNode)) + _, hoSync := h.oc.hybridOverlayFailed.Load(newNode.Name) + nodeSyncsParam = &nodeSyncs{ + nodeSync, + clusterRtrSync, + mgmtSync, + gwSync, + hoSync} + } else { + klog.Infof("Node %s moved from the remote zone %s to local zone.", + newNode.Name, util.GetNodeZone(oldNode), util.GetNodeZone(newNode)) + // The node is now a local zone node. Trigger a full node sync. + nodeSyncsParam = &nodeSyncs{true, true, true, true, true} + } + + return h.oc.addUpdateLocalNodeEvent(newNode, nodeSyncsParam) + } else { + return h.oc.addUpdateRemoteNodeEvent(newNode) + } case factory.EgressIPType: oldEIP := oldObj.(*egressipv1.EgressIP) @@ -943,7 +988,6 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int if cachedObj != nil { portInfo = cachedObj.(*lpInfo) } - h.oc.logicalPortCache.remove(pod, ovntypes.DefaultNetworkName) return h.oc.removePod(pod, portInfo) case factory.PolicyType: diff --git a/go-controller/pkg/ovn/egressgw_test.go b/go-controller/pkg/ovn/egressgw_test.go index c843e36ea7..6337d8c4d3 100644 --- a/go-controller/pkg/ovn/egressgw_test.go +++ b/go-controller/pkg/ovn/egressgw_test.go @@ -10,6 +10,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -92,6 +93,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -225,6 +231,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -366,6 +377,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -526,6 +542,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -682,6 +703,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -794,6 +820,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -980,6 +1011,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { UUID: "node1", Name: "node1", }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouter{ UUID: "GR_node1-UUID", Name: "GR_node1", @@ -991,6 +1026,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, namespaceX, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node2", "192.168.126.51/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ gwPod, @@ -998,6 +1038,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1036,6 +1077,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouterStaticRoute{ UUID: "static-route-1-UUID", IPPrefix: "10.128.1.3/32", @@ -1072,6 +1117,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.BFD{ UUID: bfd1NamedUUID, DstIP: "9.0.0.1", @@ -1123,6 +1172,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { UUID: "node1", Name: "node1", }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouter{ UUID: "GR_node1-UUID", Name: "GR_node1", @@ -1134,6 +1187,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, namespaceX, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node2", "192.168.126.51/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -1141,6 +1199,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1178,6 +1237,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouterStaticRoute{ UUID: "static-route-1-UUID", IPPrefix: "10.128.1.3/32", @@ -1214,6 +1277,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.BFD{ UUID: bfd1NamedUUID, DstIP: "9.0.0.1", @@ -1273,6 +1340,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { UUID: "node1", Name: "node1", }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouter{ UUID: "GR_node1-UUID", Name: "GR_node1", @@ -1284,6 +1355,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, namespaceX, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node2", "192.168.126.51/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ gwPod, @@ -1291,6 +1367,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1329,6 +1406,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouterStaticRoute{ UUID: "static-route-1-UUID", IPPrefix: "10.128.1.3/32", @@ -1365,6 +1446,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.BFD{ UUID: bfd1NamedUUID, DstIP: "11.0.0.1", @@ -1419,6 +1504,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { UUID: "node1", Name: "node1", }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouter{ UUID: "GR_node1-UUID", Name: "GR_node1", @@ -1430,6 +1519,12 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, namespaceX, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + *newNode("node2", "192.168.126.50/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -1437,6 +1532,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1483,6 +1579,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouterStaticRoute{ UUID: "static-route-1-UUID", IPPrefix: "10.128.1.3/32", @@ -1519,6 +1619,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouter{ UUID: "GR_node1-UUID", Name: "GR_node1", @@ -1546,6 +1650,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.BFD{ UUID: bfd1NamedUUID, DstIP: "9.0.0.1", @@ -1588,6 +1696,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouter{ UUID: "GR_node1-UUID", Name: "GR_node1", @@ -1626,6 +1738,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { UUID: "node1", Name: "node1", }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouter{ UUID: "GR_node1-UUID", Name: "GR_node1", @@ -1637,6 +1753,12 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + *newNode("node2", "192.168.126.50/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -1644,7 +1766,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - + fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1674,6 +1796,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.BFD{ UUID: bfd1NamedUUID, DstIP: "9.0.0.1", @@ -1745,6 +1871,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { UUID: "node1", Name: "node1", }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.LogicalRouter{ UUID: "GR_node1-UUID", Name: "GR_node1", @@ -1756,6 +1886,12 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + *newNode("node2", "192.168.126.50/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -1763,7 +1899,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - + fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1793,6 +1929,10 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "node1", Ports: []string{"lsp1"}, }, + &nbdb.LogicalSwitch{ + UUID: "node2", + Name: "node2", + }, &nbdb.BFD{ UUID: bfd1NamedUUID, DstIP: "10.0.0.1", @@ -1887,6 +2027,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -2047,6 +2192,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -2473,6 +2623,11 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: pod, }, @@ -2555,4 +2710,5 @@ func injectNode(fakeOvn *FakeOVN) { }, } gomega.ExpectWithOffset(1, fakeOvn.controller.watchFactory.NodeInformer().GetStore().Add(node)).To(gomega.Succeed()) + fakeOvn.controller.localZoneNodes.Store(node.Name, true) } diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 0de09966b9..25e21bbad1 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -550,12 +550,17 @@ func (oc *DefaultNetworkController) syncNodesPeriodic() { return } - nodes := make([]*kapi.Node, 0, len(kNodes.Items)) + localZoneKNodes := make([]*kapi.Node, 0, len(kNodes.Items)) + remoteZoneKNodes := make([]*kapi.Node, 0, len(kNodes.Items)) for i := range kNodes.Items { - nodes = append(nodes, &kNodes.Items[i]) + if oc.isLocalZoneNode(&kNodes.Items[i]) { + localZoneKNodes = append(localZoneKNodes, &kNodes.Items[i]) + } else { + remoteZoneKNodes = append(remoteZoneKNodes, &kNodes.Items[i]) + } } - if err := oc.syncChassis(nodes); err != nil { + if err := oc.syncChassis(localZoneKNodes, remoteZoneKNodes); err != nil { klog.Errorf("Failed to sync chassis: error: %v", err) } } @@ -566,7 +571,8 @@ func (oc *DefaultNetworkController) syncNodesPeriodic() { // do not want to delete. func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { foundNodes := sets.New[string]() - nodes := make([]*kapi.Node, 0, len(kNodes)) + localZoneKNodes := make([]*kapi.Node, 0, len(kNodes)) + remoteZoneKNodes := make([]*kapi.Node, 0, len(kNodes)) for _, tmp := range kNodes { node, ok := tmp.(*kapi.Node) if !ok { @@ -576,8 +582,15 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { if config.HybridOverlay.Enabled && houtil.IsHybridOverlayNode(node) { continue } - foundNodes.Insert(node.Name) - nodes = append(nodes, node) + + // Add the node to the foundNodes only if it belongs to the local zone. + if oc.isLocalZoneNode(node) { + foundNodes.Insert(node.Name) + oc.localZoneNodes.Store(node.Name, true) + localZoneKNodes = append(localZoneKNodes, node) + } else { + remoteZoneKNodes = append(remoteZoneKNodes, node) + } } defaultNetworkPredicate := func(item *nbdb.LogicalSwitch) bool { @@ -596,7 +609,7 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { } } - if err := oc.syncChassis(nodes); err != nil { + if err := oc.syncChassis(localZoneKNodes, remoteZoneKNodes); err != nil { return fmt.Errorf("failed to sync chassis: error: %v", err) } return nil @@ -604,7 +617,7 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { // Cleanup stale chassis and chassis template variables with no // corresponding nodes. -func (oc *DefaultNetworkController) syncChassis(nodes []*kapi.Node) error { +func (oc *DefaultNetworkController) syncChassis(localZoneNodes, remoteZoneNodes []*kapi.Node) error { chassisList, err := libovsdbops.ListChassis(oc.sbClient) if err != nil { return fmt.Errorf("failed to get chassis list: error: %v", err) @@ -650,7 +663,7 @@ func (oc *DefaultNetworkController) syncChassis(nodes []*kapi.Node) error { // Delete existing nodes from the chassis map. // Also delete existing templateVars from the template map. - for _, node := range nodes { + for _, node := range localZoneNodes { if chassis, ok := chassisHostNameMap[node.Name]; ok { delete(chassisNameMap, chassis.Name) delete(chassisHostNameMap, chassis.Hostname) @@ -658,6 +671,15 @@ func (oc *DefaultNetworkController) syncChassis(nodes []*kapi.Node) error { } } + // Delete existing remote zone nodes from the chassis map, but not from the templateVars + // as we need to cleanup chassisTemplateVars for the remote zone nodes + for _, node := range remoteZoneNodes { + if chassis, ok := chassisHostNameMap[node.Name]; ok { + delete(chassisNameMap, chassis.Name) + delete(chassisHostNameMap, chassis.Hostname) + } + } + staleChassis := make([]*sbdb.Chassis, 0, len(chassisHostNameMap)) for _, chassis := range chassisNameMap { staleChassis = append(staleChassis, chassis) @@ -689,11 +711,13 @@ type nodeSyncs struct { syncHo bool } -func (oc *DefaultNetworkController) addUpdateNodeEvent(node *kapi.Node, nSyncs *nodeSyncs) error { +func (oc *DefaultNetworkController) addUpdateLocalNodeEvent(node *kapi.Node, nSyncs *nodeSyncs) error { var hostSubnets []*net.IPNet var errs []error var err error + _, _ = oc.localZoneNodes.LoadOrStore(node.Name, true) + if noHostSubnet := util.NoHostSubnet(node); noHostSubnet { err := oc.lsManager.AddNoHostSubnetSwitch(node.Name) if err != nil { @@ -805,6 +829,21 @@ func (oc *DefaultNetworkController) addUpdateNodeEvent(node *kapi.Node, nSyncs * return err } +func (oc *DefaultNetworkController) addUpdateRemoteNodeEvent(node *kapi.Node) error { + // Check if the remote node is present in the local zone nodes. If its present + // it means it moved from this controller zone to other remote zone. Cleanup the node + // from the local zone cache. + _, present := oc.localZoneNodes.Load(node.Name) + + if present { + klog.Infof("Node %q moved from the local zone %s to a remote zone %s. Deleting it locally", node.Name, oc.zone, util.GetNodeZone(node)) + if err := oc.deleteNodeEvent(node); err != nil { + return fmt.Errorf("error deleting the remote node %s, err : %w", node.Name, err) + } + } + return nil +} + func (oc *DefaultNetworkController) deleteNodeEvent(node *kapi.Node) error { klog.V(5).Infof("Deleting Node %q. Removing the node from "+ "various caches", node.Name) @@ -822,6 +861,7 @@ func (oc *DefaultNetworkController) deleteNodeEvent(node *kapi.Node) error { return err } } + if err := oc.deleteNode(node.Name); err != nil { return err } @@ -831,6 +871,7 @@ func (oc *DefaultNetworkController) deleteNodeEvent(node *kapi.Node) error { oc.mgmtPortFailed.Delete(node.Name) oc.gatewaysFailed.Delete(node.Name) oc.nodeClusterRouterPortFailed.Delete(node.Name) + oc.localZoneNodes.Delete(node.Name) return nil } diff --git a/go-controller/pkg/ovn/multicast_test.go b/go-controller/pkg/ovn/multicast_test.go index cfd2dba670..485521b57c 100644 --- a/go-controller/pkg/ovn/multicast_test.go +++ b/go-controller/pkg/ovn/multicast_test.go @@ -583,6 +583,11 @@ var _ = ginkgo.Describe("OVN Multicast with IP Address Family", func() { namespace1, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: pods, }, @@ -686,6 +691,11 @@ var _ = ginkgo.Describe("OVN Multicast with IP Address Family", func() { namespace1, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, ) setIpMode(m) diff --git a/go-controller/pkg/ovn/multipolicy_test.go b/go-controller/pkg/ovn/multipolicy_test.go index 7bd044c2de..8134868baf 100644 --- a/go-controller/pkg/ovn/multipolicy_test.go +++ b/go-controller/pkg/ovn/multipolicy_test.go @@ -325,6 +325,11 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { &v1.PodList{ Items: podsList, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &knet.NetworkPolicyList{ Items: networkPolicies, }, diff --git a/go-controller/pkg/ovn/namespace_test.go b/go-controller/pkg/ovn/namespace_test.go index bf048da759..059a76db41 100644 --- a/go-controller/pkg/ovn/namespace_test.go +++ b/go-controller/pkg/ovn/namespace_test.go @@ -158,6 +158,11 @@ var _ = ginkgo.Describe("OVN Namespace Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *tPod, diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index 1f67f14b7e..d8f31db93f 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -126,6 +126,18 @@ func (oc *DefaultNetworkController) ensurePod(oldPod, pod *kapi.Pod, addPort boo if !util.PodScheduled(pod) { return nil } + + if oc.isPodScheduledinLocalZone(pod) { + return oc.ensureLocalZonePod(oldPod, pod, addPort) + } + + // TODO (numans): For remote zone pods add the pod ips to the namespace address set + return nil +} + +// ensureLocalZonePod tries to set up a local zone pod. It returns nil on success and error on failure; failure +// indicates the pod set up should be retried later. +func (oc *DefaultNetworkController) ensureLocalZonePod(oldPod, pod *kapi.Pod, addPort bool) error { if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { @@ -166,6 +178,20 @@ func (oc *DefaultNetworkController) ensurePod(oldPod, pod *kapi.Pod, addPort boo // removePod tried to tear down a pod. It returns nil on success and error on failure; // failure indicates the pod tear down should be retried later. func (oc *DefaultNetworkController) removePod(pod *kapi.Pod, portInfo *lpInfo) error { + if oc.isPodScheduledinLocalZone(pod) { + return oc.removeLocalZonePod(pod, portInfo) + } + + // TODO (numans) When we add the remote pod ips to the namespace address set, remove them + // when the remote pod is deleted. + return nil +} + +// removeLocalZonePod tries to tear down a local zone pod. It returns nil on success and error on failure; +// failure indicates the pod tear down should be retried later. +func (oc *DefaultNetworkController) removeLocalZonePod(pod *kapi.Pod, portInfo *lpInfo) error { + oc.logicalPortCache.remove(pod, ovntypes.DefaultNetworkName) + if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index f4515eeccc..1f9a548f45 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -4,9 +4,10 @@ import ( "context" "encoding/json" "fmt" - "github.com/onsi/ginkgo" "sync" + "github.com/onsi/ginkgo" + mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" mnpfake "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/clientset/versioned/fake" nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" @@ -23,6 +24,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" @@ -163,6 +165,13 @@ func (o *FakeOVN) init(nadList []*nettypes.NetworkAttachmentDefinition) { err := o.NewSecondaryNetworkController(nad) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } + + existingNodes, err := o.controller.kube.GetNodes() + if err == nil { + for _, node := range existingNodes.Items { + o.controller.localZoneNodes.Store(node.Name, true) + } + } } func resetNBClient(ctx context.Context, nbClient libovsdbclient.Client) { @@ -193,6 +202,17 @@ func NewOvnController(ovnClient *util.OVNMasterClientset, wf *factory.WatchFacto } podRecorder := metrics.NewPodRecorder() + + nbZoneFailed := false + // Try to get the NBZone. If there is an error, create NB_Global record. + // Otherwise NewCommonNetworkControllerInfo() will return error since it + // calls util.GetNBZone(). + _, err := util.GetNBZone(libovsdbOvnNBClient) + if err != nil { + nbZoneFailed = true + err = createTestNBGlobal(libovsdbOvnNBClient, "global") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } cnci, err := NewCommonNetworkControllerInfo( ovnClient.KubeClient, &kube.KubeOVN{ @@ -215,7 +235,49 @@ func NewOvnController(ovnClient *util.OVNMasterClientset, wf *factory.WatchFacto return nil, err } - return newDefaultNetworkControllerCommon(cnci, stopChan, wg, addressSetFactory) + dnc, err := newDefaultNetworkControllerCommon(cnci, stopChan, wg, addressSetFactory) + + if nbZoneFailed { + // Delete the NBGlobal row as this function created it. Otherwise many tests would fail while + // checking the expectedData in the NBDB. + err = deleteTestNBGlobal(libovsdbOvnNBClient, "global") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + + return dnc, err +} + +func createTestNBGlobal(nbClient libovsdbclient.Client, zone string) error { + nbGlobal := &nbdb.NBGlobal{Name: zone} + ops, err := nbClient.Create(nbGlobal) + if err != nil { + return err + } + + _, err = nbClient.Transact(context.Background(), ops...) + if err != nil { + return err + } + + return nil +} + +func deleteTestNBGlobal(nbClient libovsdbclient.Client, zone string) error { + p := func(nbGlobal *nbdb.NBGlobal) bool { + return true + } + + ops, err := nbClient.WhereCache(p).Delete() + if err != nil { + return err + } + + _, err = nbClient.Transact(context.Background(), ops...) + if err != nil { + return err + } + + return nil } func newNetworkAttachmentDefinition(namespace, name string, netconf ovncnitypes.NetConf) (*nettypes.NetworkAttachmentDefinition, error) { @@ -245,6 +307,17 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt topoType := netConfInfo.TopologyType() ocInfo, ok = o.secondaryControllers[netName] if !ok { + nbZoneFailed := false + // Try to get the NBZone. If there is an error, create NB_Global record. + // Otherwise NewCommonNetworkControllerInfo() will return error since it + // calls util.GetNBZone(). + _, err := util.GetNBZone(o.nbClient) + if err != nil { + nbZoneFailed = true + err = createTestNBGlobal(o.nbClient, "global") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + podRecorder := metrics.NewPodRecorder() cnci, err := NewCommonNetworkControllerInfo( o.fakeClient.KubeClient, @@ -284,6 +357,13 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt } ocInfo = secondaryControllerInfo{bnc: secondaryController, asf: asf} o.secondaryControllers[netName] = ocInfo + + if nbZoneFailed { + // Delete the NBGlobal row as this function created it. Otherwise many tests would fail while + // checking the expectedData in the NBDB. + err = deleteTestNBGlobal(o.nbClient, "global") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } } else { secondaryController = ocInfo.bnc } diff --git a/go-controller/pkg/ovn/pods.go b/go-controller/pkg/ovn/pods.go index 5492e1542d..71fab9590f 100644 --- a/go-controller/pkg/ovn/pods.go +++ b/go-controller/pkg/ovn/pods.go @@ -31,6 +31,11 @@ func (oc *DefaultNetworkController) syncPods(pods []interface{}) error { if !ok { return fmt.Errorf("spurious object in syncPods: %v", podInterface) } + + if !oc.isPodScheduledinLocalZone(pod) { + continue + } + annotations, err := util.UnmarshalPodAnnotation(pod.Annotations, ovntypes.DefaultNetworkName) if err != nil { continue @@ -71,7 +76,7 @@ func (oc *DefaultNetworkController) syncPods(pods []interface{}) error { // allocate all previously annoted hybridOverlay Distributed Router IP addresses. Allocation needs to happen here // before a Pod Add event can be processed and be allocated a previously assigned hybridOverlay Distributed Router IP address. // we do not support manually setting the hybrid overlay DRIP address - nodes, err := oc.watchFactory.GetNodes() + nodes, err := oc.GetLocalZoneNodes() if err != nil { return fmt.Errorf("failed to get nodes: %v", err) } @@ -100,9 +105,6 @@ func (oc *DefaultNetworkController) deleteLogicalPort(pod *kapi.Pod, portInfo *l if pod.Spec.HostNetwork { return nil } - if !util.PodScheduled(pod) { - return nil - } pInfo, err := oc.deletePodLogicalPort(pod, portInfo, ovntypes.DefaultNetworkName) if err != nil { diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index 9082c981bb..d185a63651 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -101,6 +101,29 @@ func newPod(namespace, name, node, podIP string) *v1.Pod { } } +func newNode(nodeName, nodeIPv4 string) *v1.Node { + return &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } +} + type testPod struct { portUUID string nodeName string @@ -258,6 +281,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { initialDB libovsdbtest.TestSetup ) + const ( + node1Name = "node1" + node2Name = "node2" + ) + ginkgo.BeforeEach(func() { // Restore global default values before each testcase config.PrepareTestConfig() @@ -305,6 +333,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -375,6 +408,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{}, }, @@ -425,6 +463,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{}, }, @@ -532,6 +575,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode("node1", "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{}, }, @@ -701,6 +749,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{*myPod}, }, @@ -744,6 +797,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{}, }, @@ -814,6 +872,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespace1, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{*pod}, }, @@ -885,6 +948,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespace1, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{*pod}, }, @@ -959,6 +1027,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespace1, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{*pod}, }, @@ -1066,6 +1139,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespace1, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{*pod}, }, @@ -1174,6 +1252,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespace1, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{*pod}, }, @@ -1252,6 +1335,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *pod, @@ -1294,6 +1382,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -1350,6 +1443,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -1405,7 +1503,13 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { ) podJSON := t.getAnnotationsJson() - fakeOvn.startWithDBSetup(initialDB) + fakeOvn.startWithDBSetup(initialDB, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, + ) t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1454,6 +1558,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *newPod(t.namespace, t.podName, t.nodeName, t.podIP), @@ -1509,6 +1618,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *pod, @@ -1727,6 +1841,12 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + *newNode(node2Name, "192.168.126.51/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *pod1, @@ -1886,6 +2006,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *pod1, @@ -1937,6 +2062,11 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { namespaceT, }, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(node1Name, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: []v1.Pod{ *p, diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index cee78276a3..cf3ad60656 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -665,6 +665,11 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { &v1.NamespaceList{ Items: namespaces, }, + &v1.NodeList{ + Items: []v1.Node{ + *newNode(nodeName, "192.168.126.202/24"), + }, + }, &v1.PodList{ Items: podsList, }, diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 3d43b8d58e..b67f647283 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -371,3 +371,20 @@ func UpdateNodeSwitchExcludeIPs(nbClient libovsdbclient.Client, nodeName string, return nil } + +// GetNBZone returns the zone name configured in the OVN Northbound database. +// If the zone name is not configured, it returns the default zone name - "global" +// It retuns error if there is no NBGlobal row. +func GetNBZone(nbClient libovsdbclient.Client) (string, error) { + nbGlobal := &nbdb.NBGlobal{} + nbGlobal, err := libovsdbops.GetNBGlobal(nbClient, nbGlobal) + if err != nil { + return "", fmt.Errorf("error in getting the NBGlobal row from Northbound db : err - %w", err) + } + + if nbGlobal.Name == "" { + return types.OvnDefaultZone, nil + } + + return nbGlobal.Name, nil +} From f40f40aaa349990b191aaf925c5075a84f335de7 Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Tue, 7 Feb 2023 11:24:53 -0500 Subject: [PATCH 05/90] cluster-manager: Generate Transit switch port ips. In order to interconnect multiple zones (i.e for the traffic flow from one zone to another), network controller manager needs to create a Transit switch in OVN Northbound database and create a logical port for each node. In this commit, cluster manager generates transit switch port ips for each node if interconnect feature is enabled (a feature flag "--enable-interconnect" is added for this) and stores it in the node annotation - "k8s.ovn.org/ovn-node-transit-switch-port-ifaddr". It derives the node transit switch port ips for each node from the config - ClusterManager.V4TransitSwitchSubnet/V6TransitSwitchSubnet (newly added by this patch) and the node id. In the next commit, network controller manager will create the Transit switch and make use of these generated transit switch port ips. Note: This commit still doesn't support multiple zones. Upcoming commits will add the support. Signed-off-by: Numan Siddique --- .../pkg/clustermanager/clustermanager_test.go | 268 ++++++++++++++++++ .../clustermanager/zone_cluster_controller.go | 52 ++++ go-controller/pkg/config/config.go | 83 ++++++ go-controller/pkg/config/config_test.go | 6 + go-controller/pkg/util/node_annotations.go | 65 ++++- 5 files changed, 459 insertions(+), 15 deletions(-) diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index 89e26d3d12..1addfe3f79 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -29,6 +29,9 @@ const ( // ovnNodeGRLRPAddrAnnotaton is the node annotation name used to store the node gateway router port ips. ovnNodeGRLRPAddrAnnotaton = "k8s.ovn.org/node-gateway-router-lrp-ifaddr" + + // ovnTransitSwitchPortAddrAnnotation is the node annotation name to store the transit switch port ips. + ovnTransitSwitchPortAddrAnnotation = "k8s.ovn.org/node-transit-switch-port-ifaddr" ) var _ = ginkgo.Describe("Cluster Manager", func() { @@ -1150,4 +1153,269 @@ var _ = ginkgo.Describe("Cluster Manager", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) }) + + ginkgo.Context("Transit switch port IP allocations", func() { + ginkgo.It("Interconnect enabled", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterManager, err := NewClusterManager(fakeClient, f, "identity", wg, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer clusterManager.Stop() + + // Check that cluster manager has allocated id transit switch port ips for each node + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + _, ok := updatedNode.Annotations[ovnTransitSwitchPortAddrAnnotation] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have transit switch port ips allocated", n.Name) + } + + transitSwitchIps, err := util.ParseNodeTransitSwitchPortAddrs(updatedNode) + if err != nil { + return fmt.Errorf("error parsing transit switch ip annotations for the node %s", n.Name) + } + + if len(transitSwitchIps) < 1 { + return fmt.Errorf("transit switch ips for node %s not allocated", n.Name) + } + + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "--enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Interconnect enabled - clear the transit switch port ips and check", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterManager, err := NewClusterManager(fakeClient, f, "identity", wg, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer clusterManager.Stop() + + // Check that cluster manager has allocated id transit switch port ips for each node + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + _, ok := updatedNode.Annotations[ovnTransitSwitchPortAddrAnnotation] + if !ok { + return fmt.Errorf("expected node annotation for node %s to have transit switch port ips allocated", n.Name) + } + + transitSwitchIps, err := util.ParseNodeTransitSwitchPortAddrs(updatedNode) + if err != nil { + return fmt.Errorf("error parsing transit switch ip annotations for the node %s", n.Name) + } + + if len(transitSwitchIps) < 1 { + return fmt.Errorf("transit switch ips for node %s not allocated", n.Name) + } + + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + // Clear the transit switch port ip annotation from node 1. + node1, _ := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), "node1", metav1.GetOptions{}) + nodeAnnotations := node1.Annotations + nodeAnnotator := kube.NewNodeAnnotator(&kube.Kube{kubeFakeClient}, "node1") + for k, v := range nodeAnnotations { + nodeAnnotator.Set(k, v) + } + node1TransitSwitchIps := node1.Annotations[ovnTransitSwitchPortAddrAnnotation] + nodeAnnotator.Delete(ovnTransitSwitchPortAddrAnnotation) + err = nodeAnnotator.Run() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), "node1", metav1.GetOptions{}) + if err != nil { + return err + } + + updatedNode1TransitSwitchIps, ok := updatedNode.Annotations[ovnTransitSwitchPortAddrAnnotation] + if !ok { + return fmt.Errorf("expected node annotation for node node1 to have transit switch port ips allocated") + } + + transitSwitchIps, err := util.ParseNodeTransitSwitchPortAddrs(updatedNode) + if err != nil { + return fmt.Errorf("error parsing transit switch ip annotations for the node node1") + } + + if len(transitSwitchIps) < 1 { + return fmt.Errorf("transit switch ips for node node1 not allocated") + } + gomega.Expect(node1TransitSwitchIps).To(gomega.Equal(updatedNode1TransitSwitchIps)) + return nil + }).ShouldNot(gomega.HaveOccurred()) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "--enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Interconnect disabled", func() { + app.Action = func(ctx *cli.Context) error { + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + }, + }, + } + kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{ + Items: nodes, + }) + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: kubeFakeClient, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterManager, err := NewClusterManager(fakeClient, f, "identity", wg, nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer clusterManager.Stop() + + // Check that cluster manager has allocated id transit switch port ips for each node + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + _, ok := updatedNode.Annotations[ovnTransitSwitchPortAddrAnnotation] + if ok { + return fmt.Errorf("not expected node annotation for node %s to have transit switch port ips allocated", n.Name) + } + + return nil + }).ShouldNot(gomega.HaveOccurred()) + } + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + }) diff --git a/go-controller/pkg/clustermanager/zone_cluster_controller.go b/go-controller/pkg/clustermanager/zone_cluster_controller.go index 7de1c27791..0b6fa09650 100644 --- a/go-controller/pkg/clustermanager/zone_cluster_controller.go +++ b/go-controller/pkg/clustermanager/zone_cluster_controller.go @@ -42,6 +42,10 @@ type zoneClusterController struct { // node gateway router port IP generators (connecting to the join switch) nodeGWRouterLRPIPv4Generator *ipGenerator nodeGWRouterLRPIPv6Generator *ipGenerator + + // Transit switch IP generator. This is required if EnableInterconnect feature is enabled. + transitSwitchIPv4Generator *ipGenerator + transitSwitchIPv6Generator *ipGenerator } func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory) (*zoneClusterController, error) { @@ -80,6 +84,24 @@ func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *fa } } + var transitSwitchIPv4Generator, transitSwitchIPv6Generator *ipGenerator + + if config.OVNKubernetesFeature.EnableInterconnect { + if config.IPv4Mode { + transitSwitchIPv4Generator, err = newIPGenerator(config.ClusterManager.V4TransitSwitchSubnet) + if err != nil { + return nil, fmt.Errorf("error creating IP Generator for v4 transit switch subnet %s: %w", config.ClusterManager.V4TransitSwitchSubnet, err) + } + } + + if config.IPv6Mode { + transitSwitchIPv6Generator, err = newIPGenerator(config.ClusterManager.V6TransitSwitchSubnet) + if err != nil { + return nil, fmt.Errorf("error creating IP Generator for v6 transit switch subnet %s: %w", config.ClusterManager.V4TransitSwitchSubnet, err) + } + } + } + zcc := &zoneClusterController{ kube: kube, watchFactory: wf, @@ -88,6 +110,8 @@ func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *fa nodeIDAllocator: nodeIDAllocator, nodeGWRouterLRPIPv4Generator: nodeGWRouterLRPIPv4Generator, nodeGWRouterLRPIPv6Generator: nodeGWRouterLRPIPv6Generator, + transitSwitchIPv4Generator: transitSwitchIPv4Generator, + transitSwitchIPv6Generator: transitSwitchIPv6Generator, } zcc.initRetryFramework() @@ -163,6 +187,31 @@ func (zcc *zoneClusterController) handleAddUpdateNodeEvent(node *corev1.Node) er node.Name, err) } + if config.OVNKubernetesFeature.EnableInterconnect { + v4Addr = nil + v6Addr = nil + if config.IPv4Mode { + v4Addr, err = zcc.transitSwitchIPv4Generator.GenerateIP(allocatedNodeID) + if err != nil { + return fmt.Errorf("failed to generate transit switch port IPv4 address for node %s : err - %w", node.Name, err) + } + } + + if config.IPv6Mode { + v6Addr, err = zcc.transitSwitchIPv6Generator.GenerateIP(allocatedNodeID) + if err != nil { + return fmt.Errorf("failed to generate transit switch port IPv6 address for node %s : err - %w", node.Name, err) + } + } + + nodeAnnotations, err = util.CreateNodeTransitSwitchPortAddrAnnotation(nodeAnnotations, v4Addr, v6Addr) + if err != nil { + return fmt.Errorf("failed to marshal node %q annotation for Gateway LRP IPs, err : %w", + node.Name, err) + } + } + // TODO (numans) If EnableInterconnect is false, clear the NodeTransitSwitchPortAddrAnnotation if set. + return zcc.kube.SetAnnotationsOnNode(node.Name, nodeAnnotations) } @@ -346,6 +395,9 @@ func (h *zoneClusterControllerEventHandler) AreResourcesEqual(obj1, obj2 interfa if util.NodeGatewayRouterLRPAddrAnnotationChanged(node1, node2) { return false, nil } + if util.NodeTransitSwitchPortAddrAnnotationChanged(node1, node2) { + return false, nil + } return true, nil } diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 7ac4e6b389..b95453ec45 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -166,6 +166,11 @@ var ( OvnKubeNode = OvnKubeNodeConfig{ Mode: types.NodeModeFull, } + + ClusterManager = ClusterManagerConfig{ + V4TransitSwitchSubnet: "168.254.0.0/16", + V6TransitSwitchSubnet: "fd97::/64", + } ) const ( @@ -349,6 +354,7 @@ type OVNKubernetesFeatureConfig struct { EnableMultiNetwork bool `gcfg:"enable-multi-network"` EnableMultiNetworkPolicy bool `gcfg:"enable-multi-networkpolicy"` EnableStatelessNetPol bool `gcfg:"enable-stateless-netpol"` + EnableInterconnect bool `gcfg:"enable-interconnect"` } // GatewayMode holds the node gateway mode @@ -446,6 +452,14 @@ type OvnKubeNodeConfig struct { DisableOVNIfaceIdVer bool `gcfg:"disable-ovn-iface-id-ver"` } +// ClusterManagerConfig holds configuration for ovnkube-cluster-manager +type ClusterManagerConfig struct { + // V4TransitSwitchSubnet to be used in the cluster for interconnecting multiple zones + V4TransitSwitchSubnet string `gcfg:"v4-transit-switch-subnet"` + // V6TransitSwitchSubnet to be used in the cluster for interconnecting multiple zones + V6TransitSwitchSubnet string `gcfg:"v6-transit-switch-subnet"` +} + // OvnDBScheme describes the OVN database connection transport method type OvnDBScheme string @@ -475,6 +489,7 @@ type config struct { ClusterMgrHA HAConfig HybridOverlay HybridOverlayConfig OvnKubeNode OvnKubeNodeConfig + ClusterManager ClusterManagerConfig } var ( @@ -493,6 +508,8 @@ var ( savedClusterMgrHA HAConfig savedHybridOverlay HybridOverlayConfig savedOvnKubeNode OvnKubeNodeConfig + savedClusterManager ClusterManagerConfig + // legacy service-cluster-ip-range CLI option serviceClusterIPRange string // legacy cluster-subnet CLI option @@ -519,6 +536,7 @@ func init() { savedMasterHA = MasterHA savedHybridOverlay = HybridOverlay savedOvnKubeNode = OvnKubeNode + savedClusterManager = ClusterManager cli.VersionPrinter = func(c *cli.Context) { fmt.Printf("Version: %s\n", Version) fmt.Printf("Git commit: %s\n", Commit) @@ -548,6 +566,7 @@ func PrepareTestConfig() error { MasterHA = savedMasterHA HybridOverlay = savedHybridOverlay OvnKubeNode = savedOvnKubeNode + ClusterManager = savedClusterManager if err := completeConfig(); err != nil { return err @@ -928,6 +947,12 @@ var OVNK8sFeatureFlags = []cli.Flag{ Destination: &cliConfig.OVNKubernetesFeature.EnableStatelessNetPol, Value: OVNKubernetesFeature.EnableStatelessNetPol, }, + &cli.BoolFlag{ + Name: "enable-interconnect", + Usage: "Configure to enable interconnecting multiple zones.", + Destination: &cliConfig.OVNKubernetesFeature.EnableInterconnect, + Value: OVNKubernetesFeature.EnableInterconnect, + }, } // K8sFlags capture Kubernetes-related options @@ -1346,6 +1371,22 @@ var OvnKubeNodeFlags = []cli.Flag{ }, } +// ClusterManagerFlags captures ovnkube-cluster-manager specific configurations +var ClusterManagerFlags = []cli.Flag{ + &cli.StringFlag{ + Name: "cluster-manager-v4-transit-switch-subnet", + Usage: "The v4 transit switch subnet used for assigning transit switch IPv4 addresses for interconnect", + Destination: &cliConfig.ClusterManager.V4TransitSwitchSubnet, + Value: ClusterManager.V4TransitSwitchSubnet, + }, + &cli.StringFlag{ + Name: "cluster-manager-v6-transit-switch-subnet", + Usage: "The v6 transit switch subnet used for assigning transit switch IPv6 addresses for interconnect", + Destination: &cliConfig.ClusterManager.V6TransitSwitchSubnet, + Value: ClusterManager.V6TransitSwitchSubnet, + }, +} + // Flags are general command-line flags. Apps should add these flags to their // own urfave/cli flags and call InitConfig() early in the application. var Flags []cli.Flag @@ -1367,6 +1408,7 @@ func GetFlags(customFlags []cli.Flag) []cli.Flag { flags = append(flags, MonitoringFlags...) flags = append(flags, IPFIXFlags...) flags = append(flags, OvnKubeNodeFlags...) + flags = append(flags, ClusterManagerFlags...) flags = append(flags, customFlags...) return flags } @@ -1797,6 +1839,37 @@ func completeHybridOverlayConfig(allSubnets *configSubnets) error { return nil } +func buildClusterManagerConfig(ctx *cli.Context, cli, file *config) error { + // Copy config file values over default values + if err := overrideFields(&ClusterManager, &file.ClusterManager, &savedClusterManager); err != nil { + return err + } + + // And CLI overrides over config file and default values + if err := overrideFields(&ClusterManager, &cli.ClusterManager, &savedClusterManager); err != nil { + return err + } + + return nil +} + +// completeClusterManagerConfig completes the ClusterManager config by parsing raw values +// into their final form. +func completeClusterManagerConfig() error { + // Validate v4 and v6 transit switch subnets + v4IP, _, err := net.ParseCIDR(ClusterManager.V4TransitSwitchSubnet) + if err != nil || utilnet.IsIPv6(v4IP) { + return fmt.Errorf("invalid transit switch v4 subnet specified, subnet: %s: error: %v", ClusterManager.V4TransitSwitchSubnet, err) + } + + v6IP, _, err := net.ParseCIDR(ClusterManager.V6TransitSwitchSubnet) + if err != nil || !utilnet.IsIPv6(v6IP) { + return fmt.Errorf("invalid transit switch v4 join subnet specified, subnet: %s: error: %v", ClusterManager.V6TransitSwitchSubnet, err) + } + + return nil +} + func buildDefaultConfig(cli, file *config) error { if err := overrideFields(&Default, &file.Default, &savedDefault); err != nil { return err @@ -1891,6 +1964,7 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d MasterHA: savedMasterHA, HybridOverlay: savedHybridOverlay, OvnKubeNode: savedOvnKubeNode, + ClusterManager: savedClusterManager, } configFile, configFileIsDefault = getConfigFilePath(ctx) @@ -2008,6 +2082,10 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d return "", err } + if err = buildClusterManagerConfig(ctx, &cliConfig, &cfg); err != nil { + return "", err + } + tmpAuth, err := buildOvnAuth(exec, true, &cliConfig.OvnNorth, &cfg.OvnNorth, defaults.OvnNorthAddress) if err != nil { return "", err @@ -2035,6 +2113,7 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d klog.V(5).Infof("OVN South config: %+v", OvnSouth) klog.V(5).Infof("Hybrid Overlay config: %+v", HybridOverlay) klog.V(5).Infof("Ovnkube Node config: %+v", OvnKubeNode) + klog.V(5).Infof("Ovnkube Cluster Manager config: %+v", ClusterManager) return retConfigFile, nil } @@ -2058,6 +2137,10 @@ func completeConfig() error { return err } + if err := completeClusterManagerConfig(); err != nil { + return err + } + if err := allSubnets.checkForOverlaps(); err != nil { return err } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 1845334439..cb3b33384c 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -216,6 +216,7 @@ egressip-reachability-total-timeout=3 egressip-node-healthcheck-port=1234 enable-multi-network=false enable-multi-networkpolicy=false +enable-interconnect=false ` var newData string @@ -312,6 +313,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(0)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EnableMultiNetworkPolicy).To(gomega.BeFalse()) + gomega.Expect(OVNKubernetesFeature.EnableInterconnect).To(gomega.BeFalse()) for _, a := range []OvnAuthConfig{OvnNorth, OvnSouth} { gomega.Expect(a.Scheme).To(gomega.Equal(OvnDBSchemeUnix)) @@ -549,6 +551,7 @@ var _ = Describe("Config Operations", func() { err = writeTestConfigFile(cfgFile.Name(), "kubeconfig="+kubeconfigFile, "cacert="+kubeCAFile, "enable-multi-network=true", "enable-multi-networkpolicy=true", + "enable-interconnect=true", "zone=foo", ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -625,6 +628,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(3)) gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(1234)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeTrue()) + gomega.Expect(OVNKubernetesFeature.EnableInterconnect).To(gomega.BeTrue()) gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ {ovntest.MustParseIPNet("11.132.0.0/14"), 23}, })) @@ -712,6 +716,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(4321)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnableMultiNetworkPolicy).To(gomega.BeTrue()) + gomega.Expect(OVNKubernetesFeature.EnableInterconnect).To(gomega.BeTrue()) gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ {ovntest.MustParseIPNet("11.132.0.0/14"), 23}, })) @@ -772,6 +777,7 @@ var _ = Describe("Config Operations", func() { "-egressip-node-healthcheck-port=4321", "-enable-multi-network=true", "-enable-multi-networkpolicy=true", + "-enable-interconnect=true", "-healthz-bind-address=0.0.0.0:4321", } err = app.Run(cliArgs) diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index f861b9c7d8..109fdffb5c 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -81,6 +81,10 @@ const ( // ovnkube-node gets the node's zone from the OVN Southbound database. ovnNodeZoneName = "k8s.ovn.org/zone-name" + // ovnTransitSwitchPortAddr is the annotation to store the node Transit switch port ips. + // It is set by cluster manager. + ovnTransitSwitchPortAddr = "k8s.ovn.org/node-transit-switch-port-ifaddr" + // ovnNodeID is the id (of type integer) of a node. It is set by cluster-manager. ovnNodeID = "k8s.ovn.org/node-id" @@ -358,8 +362,10 @@ func SetNodePrimaryIfAddrs(nodeAnnotator kube.Annotator, ifAddrs []*net.IPNet) ( return nodeAnnotator.Set(ovnNodeIfAddr, primaryIfAddrAnnotation) } -// CreateNodeGatewayRouterLRPAddrAnnotation sets the IPv4 / IPv6 values of the node's Gatewary Router LRP to join switch. -func CreateNodeGatewayRouterLRPAddrAnnotation(nodeAnnotation map[string]interface{}, nodeIPNetv4, +// createPrimaryIfAddrAnnotation marshals the IPv4 / IPv6 values in the +// primaryIfAddrAnnotation format and stores it in the nodeAnnotation +// map with the provided 'annotationName' as key +func createPrimaryIfAddrAnnotation(annotationName string, nodeAnnotation map[string]interface{}, nodeIPNetv4, nodeIPNetv6 *net.IPNet) (map[string]interface{}, error) { if nodeAnnotation == nil { nodeAnnotation = make(map[string]interface{}) @@ -375,14 +381,30 @@ func CreateNodeGatewayRouterLRPAddrAnnotation(nodeAnnotation map[string]interfac if err != nil { return nil, err } - nodeAnnotation[ovnNodeGRLRPAddr] = string(bytes) + nodeAnnotation[annotationName] = string(bytes) return nodeAnnotation, nil } +// CreateNodeGatewayRouterLRPAddrAnnotation sets the IPv4 / IPv6 values of the node's Gatewary Router LRP to join switch. +func CreateNodeGatewayRouterLRPAddrAnnotation(nodeAnnotation map[string]interface{}, nodeIPNetv4, + nodeIPNetv6 *net.IPNet) (map[string]interface{}, error) { + return createPrimaryIfAddrAnnotation(ovnNodeGRLRPAddr, nodeAnnotation, nodeIPNetv4, nodeIPNetv6) +} + func NodeGatewayRouterLRPAddrAnnotationChanged(oldNode, newNode *corev1.Node) bool { return oldNode.Annotations[ovnNodeGRLRPAddr] != newNode.Annotations[ovnNodeGRLRPAddr] } +// CreateNodeTransitSwitchPortAddrAnnotation creates the node annotation for the node's Transit switch port addresses. +func CreateNodeTransitSwitchPortAddrAnnotation(nodeAnnotation map[string]interface{}, nodeIPNetv4, + nodeIPNetv6 *net.IPNet) (map[string]interface{}, error) { + return createPrimaryIfAddrAnnotation(ovnTransitSwitchPortAddr, nodeAnnotation, nodeIPNetv4, nodeIPNetv6) +} + +func NodeTransitSwitchPortAddrAnnotationChanged(oldNode, newNode *corev1.Node) bool { + return oldNode.Annotations[ovnTransitSwitchPortAddr] != newNode.Annotations[ovnTransitSwitchPortAddr] +} + const UnlimitedNodeCapacity = math.MaxInt32 type ifAddr struct { @@ -469,38 +491,51 @@ func ParseNodeGatewayRouterLRPAddr(node *kapi.Node) (net.IP, error) { return ip, nil } -// ParseNodeGatewayRouterLRPAddrs returns the IPv4 and/or IPv6 addresses for the node's gateway router port -// stored in the 'ovnNodeGRLRPAddr' annotation -func ParseNodeGatewayRouterLRPAddrs(node *kapi.Node) ([]*net.IPNet, error) { - nodeIfAddrAnnotation, ok := node.Annotations[ovnNodeGRLRPAddr] +// parsePrimaryIfAddrAnnotation unmarshals the IPv4 / IPv6 values in the +// primaryIfAddrAnnotation format from the nodeAnnotation map with the +// provided 'annotationName' as key and returns the addresses. +func parsePrimaryIfAddrAnnotation(node *kapi.Node, annotationName string) ([]*net.IPNet, error) { + nodeIfAddrAnnotation, ok := node.Annotations[annotationName] if !ok { - return nil, newAnnotationNotSetError("%s annotation not found for node %q", ovnNodeGRLRPAddr, node.Name) + return nil, newAnnotationNotSetError("%s annotation not found for node %q", annotationName, node.Name) } nodeIfAddr := primaryIfAddrAnnotation{} if err := json.Unmarshal([]byte(nodeIfAddrAnnotation), &nodeIfAddr); err != nil { - return nil, fmt.Errorf("failed to unmarshal annotation: %s for node %q, err: %v", ovnNodeGRLRPAddr, node.Name, err) + return nil, fmt.Errorf("failed to unmarshal annotation: %s for node %q, err: %w", annotationName, node.Name, err) } if nodeIfAddr.IPv4 == "" && nodeIfAddr.IPv6 == "" { return nil, fmt.Errorf("node: %q does not have any IP information set", node.Name) } - var gwLRPAddrs []*net.IPNet + var ipAddrs []*net.IPNet if nodeIfAddr.IPv4 != "" { ip, ipNet, err := net.ParseCIDR(nodeIfAddr.IPv4) if err != nil { - return nil, fmt.Errorf("failed to parse IPv4 address %s from annotation: %s for node %q, err: %v", nodeIfAddr.IPv4, ovnNodeGRLRPAddr, node.Name, err) + return nil, fmt.Errorf("failed to parse IPv4 address %s from annotation: %s for node %q, err: %w", nodeIfAddr.IPv4, annotationName, node.Name, err) } - gwLRPAddrs = append(gwLRPAddrs, &net.IPNet{IP: ip, Mask: ipNet.Mask}) + ipAddrs = append(ipAddrs, &net.IPNet{IP: ip, Mask: ipNet.Mask}) } if nodeIfAddr.IPv6 != "" { ip, ipNet, err := net.ParseCIDR(nodeIfAddr.IPv6) if err != nil { - return nil, fmt.Errorf("failed to parse IPv6 address %s from annotation: %s for node %q, err: %v", nodeIfAddr.IPv6, ovnNodeGRLRPAddr, node.Name, err) + return nil, fmt.Errorf("failed to parse IPv6 address %s from annotation: %s for node %q, err: %w", nodeIfAddr.IPv6, annotationName, node.Name, err) } - gwLRPAddrs = append(gwLRPAddrs, &net.IPNet{IP: ip, Mask: ipNet.Mask}) + ipAddrs = append(ipAddrs, &net.IPNet{IP: ip, Mask: ipNet.Mask}) } - return gwLRPAddrs, nil + return ipAddrs, nil +} + +// ParseNodeGatewayRouterLRPAddrs returns the IPv4 and/or IPv6 addresses for the node's gateway router port +// stored in the 'ovnNodeGRLRPAddr' annotation +func ParseNodeGatewayRouterLRPAddrs(node *kapi.Node) ([]*net.IPNet, error) { + return parsePrimaryIfAddrAnnotation(node, ovnNodeGRLRPAddr) +} + +// ParseNodeTransitSwitchPortAddrs returns the IPv4 and/or IPv6 addresses for the node's transit switch port +// stored in the 'ovnTransitSwitchPortAddr' annotation +func ParseNodeTransitSwitchPortAddrs(node *kapi.Node) ([]*net.IPNet, error) { + return parsePrimaryIfAddrAnnotation(node, ovnTransitSwitchPortAddr) } // ParseCloudEgressIPConfig returns the cloud's information concerning the node's primary network interface From 81fd01e708543d57b24a3243ade23781dbb44cee Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Tue, 28 Jun 2022 13:24:12 -0400 Subject: [PATCH 06/90] network-controller-manager: Create interconnect resources. This patch creates interconnect resources in the Northbound database to inter-connect the configured zones if interconnect feature is enabled. ovnkube network controller manager is expected to run on each configured zone. Network controller manager does the similar job to that of the `ovn-ic` service provided by OVN. An interconnect Transit switch is created and it is connected to the OVN cluster router. For each local node of a zone - a pair of logical switch port - logical router port is created. Logical switch port is created on the Transit switch and the router port is created on the OVN cluster router. For each remote node (not part of the local zone) - a remote chassis entry is created in the Southbound database - a remote port is created in the Transit switch and this remote port is bound to the remote chassis. - A static route is added in the OVN cluster router to send the traffic to the Transit switch if the packet is destined to the remote node. This facilitates the packet traversal from a local zone node to a remote zone node. When the packet is received by the remote node, it resumes the egress stage of the Transit switch. From the Transit switch the packet enters the OVN cluster router and then to the node logical switch and finally to the destination pod. Inter connection is presently restricted to the default network. With this patch, multiple zones are supported for the default network with few other features missing. Upcoming commits will add that support. Co-authored-by: Dumitru Ceara Signed-off-by: Dumitru Ceara Signed-off-by: Numan Siddique --- go-controller/pkg/libovsdb/libovsdb.go | 2 + go-controller/pkg/libovsdbops/chassis.go | 49 ++ go-controller/pkg/libovsdbops/model.go | 25 + go-controller/pkg/libovsdbops/portbinding.go | 34 + .../node/default_node_network_controller.go | 3 + .../default_node_network_controller_test.go | 6 + .../pkg/ovn/default_network_controller.go | 39 +- go-controller/pkg/ovn/master.go | 88 +- .../ovn/zone_interconnect/chassis_handler.go | 139 +++ .../zone_interconnect/chassis_handler_test.go | 320 +++++++ .../ovn/zone_interconnect/zone_ic_handler.go | 630 ++++++++++++++ .../zone_interconnect/zone_ic_handler_test.go | 819 ++++++++++++++++++ .../zone_interconnect_suit_test.go | 13 + go-controller/pkg/types/const.go | 4 + test/e2e/multicast.go | 4 +- 15 files changed, 2158 insertions(+), 17 deletions(-) create mode 100644 go-controller/pkg/libovsdbops/portbinding.go create mode 100644 go-controller/pkg/ovn/zone_interconnect/chassis_handler.go create mode 100644 go-controller/pkg/ovn/zone_interconnect/chassis_handler_test.go create mode 100644 go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go create mode 100644 go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go create mode 100644 go-controller/pkg/ovn/zone_interconnect/zone_interconnect_suit_test.go diff --git a/go-controller/pkg/libovsdb/libovsdb.go b/go-controller/pkg/libovsdb/libovsdb.go index 1be5aad6fb..ce2521f3c5 100644 --- a/go-controller/pkg/libovsdb/libovsdb.go +++ b/go-controller/pkg/libovsdb/libovsdb.go @@ -113,6 +113,8 @@ func NewSBClientWithConfig(cfg config.OvnAuthConfig, promRegistry prometheus.Reg client.WithTable(&sbdb.MACBinding{}), // used by node sync client.WithTable(&sbdb.Chassis{}), + // used by zone interconnect + client.WithTable(&sbdb.Encap{}), // used by node sync, only interested in names client.WithTable(&chassisPrivate, &chassisPrivate.Name), // used by node sync, only interested in Chassis reference diff --git a/go-controller/pkg/libovsdbops/chassis.go b/go-controller/pkg/libovsdbops/chassis.go index 799b9cafe5..b4026fbe67 100644 --- a/go-controller/pkg/libovsdbops/chassis.go +++ b/go-controller/pkg/libovsdbops/chassis.go @@ -28,6 +28,26 @@ func ListChassisPrivate(sbClient libovsdbclient.Client) ([]*sbdb.ChassisPrivate, return found, err } +// GetChassis looks up a chassis from the cache using the 'Name' column which is an indexed +// column. +func GetChassis(sbClient libovsdbclient.Client, chassis *sbdb.Chassis) (*sbdb.Chassis, error) { + found := []*sbdb.Chassis{} + opModel := operationModel{ + Model: chassis, + ExistingResult: &found, + ErrNotFound: true, + BulkOp: false, + } + + m := newModelClient(sbClient) + err := m.Lookup(opModel) + if err != nil { + return nil, err + } + + return found[0], nil +} + // DeleteChassis deletes the provided chassis and associated private chassis func DeleteChassis(sbClient libovsdbclient.Client, chassis ...*sbdb.Chassis) error { opModels := make([]operationModel, 0, len(chassis)) @@ -115,3 +135,32 @@ func DeleteChassisWithPredicate(sbClient libovsdbclient.Client, p chassisPredica err := m.Delete(opModels...) return err } + +// CreateOrUpdateChassis creates or updates the chassis record along with the encap record +func CreateOrUpdateChassis(sbClient libovsdbclient.Client, chassis *sbdb.Chassis, encap *sbdb.Encap) error { + m := newModelClient(sbClient) + opModels := []operationModel{ + { + Model: encap, + DoAfter: func() { + encaps := append(chassis.Encaps, encap.UUID) + chassis.Encaps = sets.New(encaps...).UnsortedList() + }, + OnModelUpdates: onModelUpdatesAllNonDefault(), + ErrNotFound: false, + BulkOp: false, + }, + { + Model: chassis, + OnModelUpdates: onModelUpdatesAllNonDefault(), + ErrNotFound: false, + BulkOp: false, + }, + } + + if _, err := m.CreateOrUpdate(opModels...); err != nil { + return err + } + + return nil +} diff --git a/go-controller/pkg/libovsdbops/model.go b/go-controller/pkg/libovsdbops/model.go index 0a69ccdebf..c37fc4add2 100644 --- a/go-controller/pkg/libovsdbops/model.go +++ b/go-controller/pkg/libovsdbops/model.go @@ -56,6 +56,10 @@ func getUUID(model model.Model) string { return t.UUID case *sbdb.IGMPGroup: return t.UUID + case *sbdb.Encap: + return t.UUID + case *sbdb.PortBinding: + return t.UUID case *sbdb.MACBinding: return t.UUID case *sbdb.SBGlobal: @@ -113,6 +117,10 @@ func setUUID(model model.Model, uuid string) { t.UUID = uuid case *sbdb.IGMPGroup: t.UUID = uuid + case *sbdb.Encap: + t.UUID = uuid + case *sbdb.PortBinding: + t.UUID = uuid case *sbdb.MACBinding: t.UUID = uuid case *sbdb.SBGlobal: @@ -231,6 +239,19 @@ func copyIndexes(model model.Model) model.Model { return &sbdb.IGMPGroup{ UUID: t.UUID, } + case *sbdb.Encap: + return &sbdb.Encap{ + UUID: t.UUID, + Type: t.Type, + IP: t.IP, + } + case *sbdb.PortBinding: + return &sbdb.PortBinding{ + UUID: t.UUID, + LogicalPort: t.LogicalPort, + Datapath: t.Datapath, + TunnelKey: t.TunnelKey, + } case *sbdb.MACBinding: return &sbdb.MACBinding{ UUID: t.UUID, @@ -299,6 +320,10 @@ func getListFromModel(model model.Model) interface{} { return &[]*sbdb.ChassisPrivate{} case *sbdb.IGMPGroup: return &[]*sbdb.IGMPGroup{} + case *sbdb.Encap: + return &[]*sbdb.Encap{} + case *sbdb.PortBinding: + return &[]*sbdb.PortBinding{} case *sbdb.MACBinding: return &[]*sbdb.MACBinding{} case *nbdb.QoS: diff --git a/go-controller/pkg/libovsdbops/portbinding.go b/go-controller/pkg/libovsdbops/portbinding.go new file mode 100644 index 0000000000..4c5af3a063 --- /dev/null +++ b/go-controller/pkg/libovsdbops/portbinding.go @@ -0,0 +1,34 @@ +package libovsdbops + +import ( + "fmt" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" +) + +// UpdatePortBindingSetChassis sets the chassis column of the 'portBinding' row so that the OVN thinks that +// the port binding 'portBinding' is bound on the chassis. Ideally its ovn-controller which claims/binds +// a port binding. But for a remote chassis, we have to bind it as we created the remote chassis +// record for the remote zone nodes. +// TODO (numans) remove this function once OVN supports binding a port binding for a remote +// chassis. +func UpdatePortBindingSetChassis(sbClient libovsdbclient.Client, portBinding *sbdb.PortBinding, chassis *sbdb.Chassis) error { + ch, err := GetChassis(sbClient, chassis) + if err != nil { + return fmt.Errorf("failed to get chassis id %s(%s), error: %v", chassis.Name, chassis.Hostname, err) + } + portBinding.Chassis = &ch.UUID + + opModel := operationModel{ + Model: portBinding, + OnModelUpdates: []interface{}{&portBinding.Chassis}, + ErrNotFound: true, + BulkOp: false, + } + + m := newModelClient(sbClient) + _, err = m.CreateOrUpdate(opModel) + return err +} diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 6fcc665959..81e684c15c 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -290,6 +290,9 @@ func setupOVNNode(node *kapi.Node) error { fmt.Sprintf("other_config:bundle-idle-timeout=%d", config.Default.OpenFlowProbe), fmt.Sprintf("external_ids:hostname=\"%s\"", node.Name), + // If Interconnect feature is enabled, we want to tell ovn-controller to + // make this node/chassis as an interconnect gateway. + fmt.Sprintf("external_ids:ovn-is-interconn=%s", strconv.FormatBool(config.OVNKubernetesFeature.EnableInterconnect)), fmt.Sprintf("external_ids:ovn-monitor-all=%t", config.Default.MonitorAll), fmt.Sprintf("external_ids:ovn-ofctrl-wait-before-clear=%d", config.Default.OfctrlWaitBeforeClear), fmt.Sprintf("external_ids:ovn-enable-lflow-cache=%t", config.Default.LFlowCacheEnable), diff --git a/go-controller/pkg/node/default_node_network_controller_test.go b/go-controller/pkg/node/default_node_network_controller_test.go index 7eaac041ad..a644acf2f7 100644 --- a/go-controller/pkg/node/default_node_network_controller_test.go +++ b/go-controller/pkg/node/default_node_network_controller_test.go @@ -253,6 +253,7 @@ var _ = Describe("Node", func() { "external_ids:ovn-openflow-probe-interval=%d "+ "other_config:bundle-idle-timeout=%d "+ "external_ids:hostname=\"%s\" "+ + "external_ids:ovn-is-interconn=false "+ "external_ids:ovn-monitor-all=true "+ "external_ids:ovn-ofctrl-wait-before-clear=0 "+ "external_ids:ovn-enable-lflow-cache=true", @@ -315,6 +316,7 @@ var _ = Describe("Node", func() { "external_ids:ovn-openflow-probe-interval=%d "+ "other_config:bundle-idle-timeout=%d "+ "external_ids:hostname=\"%s\" "+ + "external_ids:ovn-is-interconn=false "+ "external_ids:ovn-monitor-all=true "+ "external_ids:ovn-ofctrl-wait-before-clear=0 "+ "external_ids:ovn-enable-lflow-cache=true", @@ -390,6 +392,7 @@ var _ = Describe("Node", func() { "external_ids:ovn-openflow-probe-interval=%d "+ "other_config:bundle-idle-timeout=%d "+ "external_ids:hostname=\"%s\" "+ + "external_ids:ovn-is-interconn=false "+ "external_ids:ovn-monitor-all=true "+ "external_ids:ovn-ofctrl-wait-before-clear=0 "+ "external_ids:ovn-enable-lflow-cache=false "+ @@ -457,6 +460,7 @@ var _ = Describe("Node", func() { "external_ids:ovn-openflow-probe-interval=%d "+ "other_config:bundle-idle-timeout=%d "+ "external_ids:hostname=\"%s\" "+ + "external_ids:ovn-is-interconn=false "+ "external_ids:ovn-monitor-all=true "+ "external_ids:ovn-ofctrl-wait-before-clear=0 "+ "external_ids:ovn-enable-lflow-cache=true", @@ -530,6 +534,7 @@ var _ = Describe("Node", func() { "external_ids:ovn-openflow-probe-interval=%d "+ "other_config:bundle-idle-timeout=%d "+ "external_ids:hostname=\"%s\" "+ + "external_ids:ovn-is-interconn=false "+ "external_ids:ovn-monitor-all=true "+ "external_ids:ovn-ofctrl-wait-before-clear=0 "+ "external_ids:ovn-enable-lflow-cache=true", @@ -603,6 +608,7 @@ var _ = Describe("Node", func() { "external_ids:ovn-openflow-probe-interval=%d "+ "other_config:bundle-idle-timeout=%d "+ "external_ids:hostname=\"%s\" "+ + "external_ids:ovn-is-interconn=false "+ "external_ids:ovn-monitor-all=true "+ "external_ids:ovn-ofctrl-wait-before-clear=0 "+ "external_ids:ovn-enable-lflow-cache=true", diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index cae05bf200..19fbaad6c1 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -24,6 +24,7 @@ import ( aclsyncer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/external_ids_syncer/acl" addrsetsyncer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/external_ids_syncer/address_set" lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" + zoneic "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/zone_interconnect" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -120,6 +121,7 @@ type DefaultNetworkController struct { addNodeFailed sync.Map nodeClusterRouterPortFailed sync.Map hybridOverlayFailed sync.Map + syncZoneICFailed sync.Map // retry framework for Cloud private IP config retryCloudPrivateIPConfig *retry.RetryFramework @@ -131,6 +133,14 @@ type DefaultNetworkController struct { // IP addresses of OVN Cluster logical router port ("GwRouterToJoinSwitchPrefix + OVNClusterRouter") // connecting to the join switch ovnClusterLRPToJoinIfAddrs []*net.IPNet + + // zoneICHandler creates the interconnect resources for local nodes and remote nodes. + // Interconnect resources are Transit switch and logical ports connecting this transit switch + // to the cluster router. Please see zone_interconnect/interconnect_handler.go for more details. + zoneICHandler *zoneic.ZoneInterconnectHandler + // zoneChassisHandler handles the local node and remote nodes in creating or updating the chassis entries in the OVN Southbound DB. + // Please see zone_interconnect/chassis_handler.go for more details. + zoneChassisHandler *zoneic.ZoneChassisHandler } // NewDefaultNetworkController creates a new OVN controller for creating logical network @@ -157,6 +167,14 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, if err != nil { return nil, fmt.Errorf("unable to create new egress service controller while creating new default network controller: %w", err) } + + var zoneICHandler *zoneic.ZoneInterconnectHandler + var zoneChassisHandler *zoneic.ZoneChassisHandler + if config.OVNKubernetesFeature.EnableInterconnect { + zoneICHandler = zoneic.NewZoneInterconnectHandler(&util.DefaultNetInfo{}, cnci.nbClient, cnci.sbClient) + zoneChassisHandler = zoneic.NewZoneChassisHandler(cnci.sbClient) + } + oc := &DefaultNetworkController{ BaseNetworkController: BaseNetworkController{ CommonNetworkControllerInfo: *cnci, @@ -197,6 +215,8 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, svcController: svcController, svcFactory: svcFactory, egressSvcController: egressSvcController, + zoneICHandler: zoneICHandler, + zoneChassisHandler: zoneChassisHandler, } // Allocate IPs for logical router port "GwRouterToJoinSwitchPrefix + OVNClusterRouter". This should always @@ -696,14 +716,16 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from _, mgmtSync := h.oc.mgmtPortFailed.Load(node.Name) _, gwSync := h.oc.gatewaysFailed.Load(node.Name) _, hoSync := h.oc.hybridOverlayFailed.Load(node.Name) + _, zoneICSync := h.oc.syncZoneICFailed.Load(node.Name) nodeParams = &nodeSyncs{ nodeSync, clusterRtrSync, mgmtSync, gwSync, - hoSync} + hoSync, + zoneICSync} } else { - nodeParams = &nodeSyncs{true, true, true, true, config.HybridOverlay.Enabled} + nodeParams = &nodeSyncs{true, true, true, true, config.HybridOverlay.Enabled, config.OVNKubernetesFeature.EnableInterconnect} } if err = h.oc.addUpdateLocalNodeEvent(node, nodeParams); err != nil { @@ -712,7 +734,7 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from return err } } else { - if err = h.oc.addUpdateRemoteNodeEvent(node); err != nil { + if err = h.oc.addUpdateRemoteNodeEvent(node, config.OVNKubernetesFeature.EnableInterconnect); err != nil { return err } } @@ -849,22 +871,27 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int nodeSubnetChanged(oldNode, newNode) || hostAddressesChanged(oldNode, newNode) || nodeGatewayMTUSupportChanged(oldNode, newNode)) _, hoSync := h.oc.hybridOverlayFailed.Load(newNode.Name) + _, syncZoneIC := h.oc.syncZoneICFailed.Load(newNode.Name) nodeSyncsParam = &nodeSyncs{ nodeSync, clusterRtrSync, mgmtSync, gwSync, - hoSync} + hoSync, + syncZoneIC} } else { klog.Infof("Node %s moved from the remote zone %s to local zone.", newNode.Name, util.GetNodeZone(oldNode), util.GetNodeZone(newNode)) // The node is now a local zone node. Trigger a full node sync. - nodeSyncsParam = &nodeSyncs{true, true, true, true, true} + nodeSyncsParam = &nodeSyncs{true, true, true, true, true, config.OVNKubernetesFeature.EnableInterconnect} } return h.oc.addUpdateLocalNodeEvent(newNode, nodeSyncsParam) } else { - return h.oc.addUpdateRemoteNodeEvent(newNode) + _, syncZoneIC := h.oc.syncZoneICFailed.Load(newNode.Name) + // Check if the node moved from local zone to remote zone and if so syncZoneIC should be set to true + syncZoneIC = syncZoneIC || h.oc.isLocalZoneNode(oldNode) + return h.oc.addUpdateRemoteNodeEvent(newNode, syncZoneIC) } case factory.EgressIPType: diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 25e21bbad1..72180a47a1 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -466,7 +466,8 @@ func (oc *DefaultNetworkController) deleteStaleNodeChassis(node *kapi.Node) erro return nil } -func (oc *DefaultNetworkController) deleteNode(nodeName string) error { +// cleanupNodeResources deletes the node resources from the OVN Northbound database +func (oc *DefaultNetworkController) cleanupNodeResources(nodeName string) error { if err := oc.deleteNodeLogicalNetwork(nodeName); err != nil { return fmt.Errorf("error deleting node %s logical network: %v", nodeName, err) } @@ -603,8 +604,8 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { } for _, nodeSwitch := range nodeSwitches { if !foundNodes.Has(nodeSwitch.Name) { - if err := oc.deleteNode(nodeSwitch.Name); err != nil { - return fmt.Errorf("failed to delete node:%s, err:%v", nodeSwitch.Name, err) + if err := oc.cleanupNodeResources(nodeSwitch.Name); err != nil { + return fmt.Errorf("failed to cleanup node resources:%s, err:%v", nodeSwitch.Name, err) } } } @@ -612,6 +613,17 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { if err := oc.syncChassis(localZoneKNodes, remoteZoneKNodes); err != nil { return fmt.Errorf("failed to sync chassis: error: %v", err) } + + if config.OVNKubernetesFeature.EnableInterconnect { + if err := oc.zoneChassisHandler.SyncNodes(kNodes); err != nil { + return fmt.Errorf("zoneChassisHandler failed to sync nodes: error: %w", err) + } + + if err := oc.zoneICHandler.SyncNodes(kNodes); err != nil { + return fmt.Errorf("zoneICHandler failed to sync nodes: error: %w", err) + } + } + return nil } @@ -709,6 +721,7 @@ type nodeSyncs struct { syncMgmtPort bool syncGw bool syncHo bool + syncZoneIC bool } func (oc *DefaultNetworkController) addUpdateLocalNodeEvent(node *kapi.Node, nSyncs *nodeSyncs) error { @@ -742,6 +755,9 @@ func (oc *DefaultNetworkController) addUpdateLocalNodeEvent(node *kapi.Node, nSy oc.mgmtPortFailed.Store(node.Name, true) oc.gatewaysFailed.Store(node.Name, true) oc.hybridOverlayFailed.Store(node.Name, config.HybridOverlay.Enabled) + if nSyncs.syncZoneIC { + oc.syncZoneICFailed.Store(node.Name, true) + } err = fmt.Errorf("nodeAdd: error adding node %q: %w", node.Name, err) oc.recordNodeErrorEvent(node, err) return err @@ -822,26 +838,67 @@ func (oc *DefaultNetworkController) addUpdateLocalNodeEvent(node *kapi.Node, nSy } } + if nSyncs.syncZoneIC && config.OVNKubernetesFeature.EnableInterconnect { + // Call zone chassis handler's AddLocalZoneNode function to mark + // this node's chassis record in Southbound db as a local zone chassis. + // This is required when a node moves from a remote zone to local zone + if err := oc.zoneChassisHandler.AddLocalZoneNode(node); err != nil { + errs = append(errs, err) + oc.syncZoneICFailed.Store(node.Name, true) + } else { + // Call zone IC handler's AddLocalZoneNode function to create + // interconnect resources in the OVN Northbound db for this local zone node. + if err := oc.zoneICHandler.AddLocalZoneNode(node); err != nil { + errs = append(errs, err) + oc.syncZoneICFailed.Store(node.Name, true) + } else { + oc.syncZoneICFailed.Delete(node.Name) + } + } + } + err = kerrors.NewAggregate(errs) if err != nil { oc.recordNodeErrorEvent(node, err) } + return err } -func (oc *DefaultNetworkController) addUpdateRemoteNodeEvent(node *kapi.Node) error { +func (oc *DefaultNetworkController) addUpdateRemoteNodeEvent(node *kapi.Node, syncZoneIC bool) error { // Check if the remote node is present in the local zone nodes. If its present // it means it moved from this controller zone to other remote zone. Cleanup the node // from the local zone cache. _, present := oc.localZoneNodes.Load(node.Name) if present { - klog.Infof("Node %q moved from the local zone %s to a remote zone %s. Deleting it locally", node.Name, oc.zone, util.GetNodeZone(node)) - if err := oc.deleteNodeEvent(node); err != nil { - return fmt.Errorf("error deleting the remote node %s, err : %w", node.Name, err) + klog.Infof("Node %q moved from the local zone %s to a remote zone %s. Cleaning the node resources", node.Name, oc.zone, util.GetNodeZone(node)) + if err := oc.cleanupNodeResources(node.Name); err != nil { + return fmt.Errorf("error cleaning up the local resources for the remote node %s, err : %w", node.Name, err) } } - return nil + + var err error + if syncZoneIC && config.OVNKubernetesFeature.EnableInterconnect { + // Call zone chassis handler's AddRemoteZoneNode function to creates + // the remote chassis for the remote zone node node in the SB DB or mark + // the entry as remote if it was local chassis earlier + if err = oc.zoneChassisHandler.AddRemoteZoneNode(node); err != nil { + err = fmt.Errorf("adding or updating remote node %s failed, err - %w", node.Name, err) + oc.syncZoneICFailed.Store(node.Name, true) + } else { + // Call zone IC handler's AddRemoteZoneNode function to create + // interconnect resources in the OVN Northbound db for this remote zone node. + if err = oc.zoneICHandler.AddRemoteZoneNode(node); err != nil { + err = fmt.Errorf("adding or updating remote node %s failed, err - %w", node.Name, err) + oc.syncZoneICFailed.Store(node.Name, true) + } else { + oc.syncZoneICFailed.Delete(node.Name) + } + } + } + + return err } func (oc *DefaultNetworkController) deleteNodeEvent(node *kapi.Node) error { @@ -862,16 +919,29 @@ func (oc *DefaultNetworkController) deleteNodeEvent(node *kapi.Node) error { } } - if err := oc.deleteNode(node.Name); err != nil { + if err := oc.cleanupNodeResources(node.Name); err != nil { return err } + if config.OVNKubernetesFeature.EnableInterconnect { + if err := oc.zoneICHandler.DeleteNode(node); err != nil { + return err + } + if !oc.isLocalZoneNode(node) { + if err := oc.zoneChassisHandler.DeleteRemoteZoneNode(node); err != nil { + return err + } + } + oc.syncZoneICFailed.Delete(node.Name) + } + oc.lsManager.DeleteSwitch(node.Name) oc.addNodeFailed.Delete(node.Name) oc.mgmtPortFailed.Delete(node.Name) oc.gatewaysFailed.Delete(node.Name) oc.nodeClusterRouterPortFailed.Delete(node.Name) oc.localZoneNodes.Delete(node.Name) + return nil } diff --git a/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go b/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go new file mode 100644 index 0000000000..95717fabeb --- /dev/null +++ b/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go @@ -0,0 +1,139 @@ +package zoneinterconnect + +import ( + "errors" + "fmt" + "strconv" + "strings" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// ZoneChassisHandler creates chassis records for the remote zone nodes +// in the OVN Southbound DB. It also creates the encap records. +type ZoneChassisHandler struct { + sbClient libovsdbclient.Client +} + +// NewZoneChassisHandler returns a new ZoneChassisHandler instance +func NewZoneChassisHandler(sbClient libovsdbclient.Client) *ZoneChassisHandler { + return &ZoneChassisHandler{ + sbClient: sbClient, + } +} + +// AddLocalZoneNode marks the chassis entry for the node in the SB DB to a local chassis +func (zch *ZoneChassisHandler) AddLocalZoneNode(node *corev1.Node) error { + if err := zch.createOrUpdateNodeChassis(node, false); err != nil { + return fmt.Errorf("failed to update chassis to local for local node %s, error: %w", node.Name, err) + } + + return nil +} + +// AddRemoteZoneNode creates the remote chassis for the remote zone node node in the SB DB or marks +// the entry as remote if it was local chassis earlier. +func (zch *ZoneChassisHandler) AddRemoteZoneNode(node *corev1.Node) error { + if err := zch.createOrUpdateNodeChassis(node, true); err != nil { + return fmt.Errorf("failed to create or update chassis to remote for remote node %s, error: %w", node.Name, err) + } + + return nil +} + +// DeleteRemoteZoneNode deletes the remote chassis (if it exists) for the node. +func (zch *ZoneChassisHandler) DeleteRemoteZoneNode(node *corev1.Node) error { + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + return fmt.Errorf("failed to parse node chassis-id for node - %s, error: %w", node.Name, err) + } + + ch := &sbdb.Chassis{ + Name: chassisID, + Hostname: node.Name, + } + + chassis, err := libovsdbops.GetChassis(zch.sbClient, ch) + if err != nil { + if errors.Is(err, libovsdbclient.ErrNotFound) { + // Nothing to do + return nil + } + return fmt.Errorf("failed to get the chassis record for the remote zone node %s, error: %w", node.Name, err) + } + if chassis.OtherConfig != nil && strings.ToLower(chassis.OtherConfig["is-remote"]) == "true" { + // Its a remote chassis, delete it. + return libovsdbops.DeleteChassis(zch.sbClient, chassis) + } + + return nil +} + +// SyncNodes cleans up the remote chassis records in the OVN Southbound db +// for the stale nodes +func (zic *ZoneChassisHandler) SyncNodes(kNodes []interface{}) error { + chassis, err := libovsdbops.ListChassis(zic.sbClient) + + if err != nil { + return fmt.Errorf("failed to get the list of chassis from OVN Southbound db : %w", err) + } + + foundNodes := sets.New[string]() + for _, tmp := range kNodes { + node, ok := tmp.(*corev1.Node) + if !ok { + return fmt.Errorf("spurious object in syncNodes: %v", tmp) + } + foundNodes.Insert(node.Name) + } + + for _, ch := range chassis { + if ch.OtherConfig != nil && strings.ToLower(ch.OtherConfig["is-remote"]) == "true" { + if !foundNodes.Has(ch.Hostname) { + // Its a stale remote chassis, delete it. + if err = libovsdbops.DeleteChassis(zic.sbClient, ch); err != nil { + return fmt.Errorf("failed to delete remote stale chassis for node %s : %w", ch.Hostname, err) + } + } + } + } + + return nil +} + +// createOrUpdateNodeChassis creates or updates the node chassis to local or remote. +func (zch *ZoneChassisHandler) createOrUpdateNodeChassis(node *corev1.Node, isRemote bool) error { + // Get the chassis id. + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + return fmt.Errorf("failed to parse node chassis-id for node - %s, error: %w", node.Name, err) + } + + nodePrimaryIp, err := util.GetNodePrimaryIP(node) + if err != nil { + return fmt.Errorf("failed to parse node %s primary IP %w", node.Name, err) + } + + chassis := sbdb.Chassis{ + Name: chassisID, + Hostname: node.Name, + OtherConfig: map[string]string{ + "is-remote": strconv.FormatBool(isRemote), + }, + } + + encap := sbdb.Encap{ + ChassisName: chassisID, + IP: nodePrimaryIp, + Type: "geneve", + Options: map[string]string{"csum": "true"}, + } + + return libovsdbops.CreateOrUpdateChassis(zch.sbClient, &chassis, &encap) +} diff --git a/go-controller/pkg/ovn/zone_interconnect/chassis_handler_test.go b/go-controller/pkg/ovn/zone_interconnect/chassis_handler_test.go new file mode 100644 index 0000000000..2f1f1a4192 --- /dev/null +++ b/go-controller/pkg/ovn/zone_interconnect/chassis_handler_test.go @@ -0,0 +1,320 @@ +package zoneinterconnect + +import ( + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + + "github.com/urfave/cli/v2" + corev1 "k8s.io/api/core/v1" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" +) + +var _ = ginkgo.Describe("Zone Interconnect Chassis Operations", func() { + var ( + app *cli.App + libovsdbCleanup *libovsdbtest.Cleanup + testNode1 corev1.Node + testNode2 corev1.Node + testNode3 corev1.Node + node1Chassis sbdb.Chassis + node2Chassis sbdb.Chassis + node3Chassis sbdb.Chassis + initialSBDB []libovsdbtest.TestData + ) + + const ( + clusterIPNet string = "10.1.0.0" + clusterCIDR string = clusterIPNet + "/16" + joinSubnetCIDR string = "100.64.0.0/16/19" + vlanID = 1024 + ) + + ginkgo.BeforeEach(func() { + // Restore global default values before each testcase + //gomega.Expect(config.PrepareTestConfig()).To(gomega.Succeed()) + + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + libovsdbCleanup = nil + + node1Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", Hostname: "node1", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6"} + node2Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", Hostname: "node2", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7"} + node3Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", Hostname: "node3", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8"} + + testNode1 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6"}, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.10"}}, + }, + } + testNode2 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7"}, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.11"}}, + }, + } + testNode3 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8"}, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.12"}}, + }, + } + + initialSBDB = []libovsdbtest.TestData{ + &node1Chassis, &node2Chassis} + }) + + ginkgo.AfterEach(func() { + if libovsdbCleanup != nil { + libovsdbCleanup.Cleanup() + } + }) + + ginkgo.It("chassis is-remote check", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnSBClient libovsdbclient.Client + _, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneChassisHandler := NewZoneChassisHandler(libovsdbOvnSBClient) + err = zoneChassisHandler.AddLocalZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check the SB Chassis. + nodeCh, err := libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "false")) + + err = zoneChassisHandler.AddLocalZoneNode(&testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node2Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "false")) + + err = zoneChassisHandler.AddRemoteZoneNode(&testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node3Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "true")) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Move chassis zone", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnSBClient libovsdbclient.Client + _, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneChassisHandler := NewZoneChassisHandler(libovsdbOvnSBClient) + err = zoneChassisHandler.AddLocalZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check the SB Chassis. + nodeCh, err := libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "false")) + + // Move the node1 chassis to remote + err = zoneChassisHandler.AddRemoteZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check the SB Chassis. + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "true")) + + // Move the chassis back to local zone + err = zoneChassisHandler.AddLocalZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check the SB Chassis. + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "false")) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Delete remote zone node", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnSBClient libovsdbclient.Client + _, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneChassisHandler := NewZoneChassisHandler(libovsdbOvnSBClient) + err = zoneChassisHandler.AddLocalZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check the SB Chassis. + nodeCh, err := libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "false")) + + // Call DeleteRemoteZoneNode for local zone node. The chassis entry should be still there + // as its not a remote zone node. + err = zoneChassisHandler.DeleteRemoteZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check the SB Chassis. + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "false")) + + // Make the testNode1 as remote zone + err = zoneChassisHandler.AddRemoteZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check the SB Chassis. + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "true")) + + // Call DeleteRemoteZoneNode for remote zone node. The chassis entry should be deleted + err = zoneChassisHandler.DeleteRemoteZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Check the SB Chassis. + _, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).To(gomega.HaveOccurred()) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Sync nodes", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnSBClient libovsdbclient.Client + _, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneChassisHandler := NewZoneChassisHandler(libovsdbOvnSBClient) + err = zoneChassisHandler.AddLocalZoneNode(&testNode1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Check the SB Chassis. + nodeCh, err := libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "false")) + + err = zoneChassisHandler.AddRemoteZoneNode(&testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node2Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "true")) + + err = zoneChassisHandler.AddRemoteZoneNode(&testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node3Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "true")) + + // Call ICHandler SyncNodes function removing the testNode3 from the list of nodes. + // Chassis record for testNode3 should be cleaned up SyncNodes. + var kNodes []interface{} + kNodes = append(kNodes, &testNode1) + kNodes = append(kNodes, &testNode2) + err = zoneChassisHandler.SyncNodes(kNodes) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node3Chassis) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(nodeCh).To(gomega.BeNil()) + + // chassis entries for testNode1 and testNode2 should be present + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node1Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "false")) + + nodeCh, err = libovsdbops.GetChassis(libovsdbOvnSBClient, &node2Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.OtherConfig).Should(gomega.HaveKeyWithValue("is-remote", "true")) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) +}) diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go new file mode 100644 index 0000000000..0af8c4c64d --- /dev/null +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go @@ -0,0 +1,630 @@ +package zoneinterconnect + +import ( + "errors" + "fmt" + "net" + "strconv" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +const ( + transitSwitchTunnelKey = "16711683" + lportTypeRouter = "router" + lportTypeRouterAddr = "router" + lportTypeRemote = "remote" +) + +/* + * ZoneInterconnectHandler creates the OVN resources required for interconnecting + * multiple zones. This handler exposes 2 main functions which a network controller + * (default and secondary) is expected to call for a node event. + * AddLocalZoneNode(node) should be called if the node 'node' is a local zone node. + * AddRemoteZoneNode(node) should be called if the node 'node' is a remote zone node. + * Zone Interconnect Handler first creates a transit switch with the name - + "_" + types.TransitSwitch + * if it is still not present. + * + * Local zone node handling + * ------------------------ + * When network controller calls AddLocalZoneNode(ovn-worker) + * - A logical switch port - router port pair is created connecting the ovn_cluster_router + * to the transit switch. + * - Node annotation - k8s.ovn.org/ovn-node-transit-switch-port-ifaddr value is used + * as the logical router port address + * + * When network controller calls AddRemoteZoneNode(ovn-worker3) + * - A logical switch port of type "remote" is created in OVN Northbound transit_switch + * for the node ovn-worker3 + * - A static route {IPPrefix: "ovn-worker3_subnet", Nexthop: "ovn-worker3_transit_port_ip"} is + * added in the ovn_cluster_router. + * - For the default network, additional static route + * {IPPrefix: "ovn-worker3_gw_router_port_host_ip", Nexthop: "ovn-worker3_transit_port_ip"} is + * added in the ovn_cluster_router + * - The corresponding port binding row in OVN Southbound DB for this logical port + * is manually bound to the remote OVN Southbound DB Chassis "ovn-worker3" + * + * ----------------------------------------------------------------------------------------------------- + * $ ovn-nbctl show ovn_cluster_router (on ovn-worker zone DB) + * router ovn_cluster_router + * ... + * port rtots-ovn-worker + * mac: "0a:58:a8:fe:00:08" + * networks: ["168.254.0.8/16", "fd97::8/64"] + * + * $ ovn-nbctl show transit_switch + * port tstor-ovn-worker + * type: router + * router-port: rtots-ovn-worker + * port tstor-ovn-worker3 + * type: remote + * addresses: ["0a:58:a8:fe:00:02 168.254.0.2/16 fd97::2/64"] + * + * $ ovn-nbctl lr-route-list ovn_cluster_router + * IPv4 Routes + * Route Table
: + * ... + * ... + * 10.244.0.0/24 (ovn-worker3 subnet) 168.254.0.2 (ovn-worker3 transit switch port ip) dst-ip + * 100.64.0.2/32 (ovn-worker3 gw router port ip) 168.254.0.2 dst-ip + * ... + * IPv6 Routes + * Route Table
: + * ... + * ... + * fd00:10:244:1::/64 (ovn-worker3 subnet) fd97::2 (ovn-worker3 transit switch port ip) dst-ip + * fd98::2 (ovn-worker3 gw router port ip) fd97::2 dst-ip + * ... + * + * $ ovn-sbctl show + * ... + * Chassis "c391c626-e1f0-4b1e-af0b-66f0807f9495" + * hostname: ovn-worker3 (Its a remote chassis entry on which tstor-ovn-worker3 is bound) + * Encap geneve + * ip: "10.89.0.26" + * options: {csum="true"} + * Port_Binding tstor-ovn-worker3 + * + * ----------------------------------------------------------------------------------------------------- + * + * Note that the Chassis entry for each remote zone node is created by ZoneChassisHandler + * + */ + +// ZoneInterconnectHandler creates the OVN resources required for interconnecting +// multiple zones for a network (default or secondary layer 3) +type ZoneInterconnectHandler struct { + // network which is inter-connected + util.NetInfo + nbClient libovsdbclient.Client + sbClient libovsdbclient.Client + // ovn_cluster_router name for the network + networkClusterRouterName string + // transit switch name for the network + networkTransitSwitchName string +} + +// NewZoneInterconnectHandler returns a new ZoneInterconnectHandler object +func NewZoneInterconnectHandler(nInfo util.NetInfo, nbClient, sbClient libovsdbclient.Client) *ZoneInterconnectHandler { + zic := &ZoneInterconnectHandler{ + NetInfo: nInfo, + nbClient: nbClient, + sbClient: sbClient, + } + + zic.networkClusterRouterName = zic.getNetworkScopedName(types.OVNClusterRouter) + zic.networkTransitSwitchName = zic.getNetworkScopedName(types.TransitSwitch) + + return zic +} + +// AddLocalZoneNode creates the interconnect resources in OVN NB DB for the local zone node. +// See createLocalZoneNodeResources() below for more details. +func (zic *ZoneInterconnectHandler) AddLocalZoneNode(node *corev1.Node) error { + klog.Infof("Creating interconnect resources for local zone node %s for the network %s", node.Name, zic.GetNetworkName()) + nodeID := util.GetNodeID(node) + if nodeID == -1 { + // Don't consider this node as cluster-manager has not allocated node id yet. + return fmt.Errorf("failed to get node id for node - %s", node.Name) + } + + if err := zic.createLocalZoneNodeResources(node, nodeID); err != nil { + return fmt.Errorf("creating interconnect resources for local zone node %s for the network %s failed : err - %w", node.Name, zic.GetNetworkName(), err) + } + + return nil +} + +// AddRemoteZoneNode creates the interconnect resources in OVN NB DB for the remote zone node. +// // See createRemoteZoneNodeResources() below for more details. +func (zic *ZoneInterconnectHandler) AddRemoteZoneNode(node *corev1.Node) error { + klog.Infof("Creating interconnect resources for remote zone node %s for the network %s", node.Name, zic.GetNetworkName()) + + nodeID := util.GetNodeID(node) + if nodeID == -1 { + // Don't consider this node as cluster-manager has not allocated node id yet. + return fmt.Errorf("failed to get node id for node - %s", node.Name) + } + + // Get the chassis id. + chassisId, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + return fmt.Errorf("failed to parse node chassis-id for node - %s, error: %w", node.Name, err) + } + + if err := zic.createRemoteZoneNodeResources(node, nodeID, chassisId); err != nil { + return fmt.Errorf("creating interconnect resources for remote zone node %s for the network %s failed : err - %w", node.Name, zic.GetNetworkName(), err) + } + + return nil +} + +// DeleteNode deletes the local zone node or remote zone node resources +func (zic *ZoneInterconnectHandler) DeleteNode(node *corev1.Node) error { + klog.Infof("Deleting interconnect resources for the node %s for the network %s", node.Name, zic.GetNetworkName()) + + return zic.cleanupNode(node.Name) +} + +// SyncNodes cleans up the interconnect resources present in the OVN Northbound db +// for the stale nodes +func (zic *ZoneInterconnectHandler) SyncNodes(kNodes []interface{}) error { + // Get the transit switch. If its not present no cleanup to do + ts := &nbdb.LogicalSwitch{ + Name: zic.networkTransitSwitchName, + } + + ts, err := libovsdbops.GetLogicalSwitch(zic.nbClient, ts) + if err != nil { + if errors.Is(err, libovsdbclient.ErrNotFound) { + // Nothing to do as there is no transit switch. This can happen for the first time + // when interconnect is enabled. + return nil + } + + return err + } + + foundNodes := sets.New[string]() + for _, tmp := range kNodes { + node, ok := tmp.(*corev1.Node) + if !ok { + return fmt.Errorf("spurious object in syncNodes: %v", tmp) + } + foundNodes.Insert(node.Name) + } + + staleNodes := []string{} + for _, p := range ts.Ports { + lp := &nbdb.LogicalSwitchPort{ + UUID: p, + } + + lp, err = libovsdbops.GetLogicalSwitchPort(zic.nbClient, lp) + if err != nil { + continue + } + + if lp.ExternalIDs == nil { + continue + } + + lportNode := lp.ExternalIDs["node"] + if !foundNodes.Has(lportNode) { + staleNodes = append(staleNodes, lportNode) + } + } + + for _, staleNode := range staleNodes { + if err := zic.cleanupNode(staleNode); err != nil { + klog.Errorf("Failed to cleanup the interconnect resources from OVN Northbound db for the stale node %s : %w", err) + } + } + + return nil +} + +// createLocalZoneNodeResources creates the local zone node resources for interconnect +// - creates Transit switch if it doesn't yet exit +// - creates a logical switch port of type "router" in the transit switch with the name as - .tstor- +// Eg. if the node name is ovn-worker and the network is default, the name would be - tstor-ovn-worker +// if the node name is ovn-worker and the network name is blue, the logical port name would be - blue.tstor-ovn-worker +// - creates a logical router port in the ovn_cluster_router with the name - .rtots- and connects +// to the node logical switch port in the transit switch +// - remove any stale static routes in the ovn_cluster_router for the node +func (zic *ZoneInterconnectHandler) createLocalZoneNodeResources(node *corev1.Node, nodeID int) error { + nodeTransitSwitchPortIPs, err := util.ParseNodeTransitSwitchPortAddrs(node) + if err != nil || len(nodeTransitSwitchPortIPs) == 0 { + return fmt.Errorf("failed to get the node transit switch port ips for node %s: %w", node.Name, err) + } + + transitRouterPortMac := util.IPAddrToHWAddr(nodeTransitSwitchPortIPs[0].IP) + var transitRouterPortNetworks []string + for _, ip := range nodeTransitSwitchPortIPs { + transitRouterPortNetworks = append(transitRouterPortNetworks, ip.String()) + } + + ts := &nbdb.LogicalSwitch{ + Name: zic.networkTransitSwitchName, + OtherConfig: map[string]string{ + "interconn-ts": zic.networkTransitSwitchName, + "requested-tnl-key": transitSwitchTunnelKey, + "mcast_snoop": "true", + "mcast_flood_unregistered": "true", + }, + } + + // Create transit switch if it doesn't exist + if err := libovsdbops.CreateOrUpdateLogicalSwitch(zic.nbClient, ts); err != nil { + return fmt.Errorf("failed to create/update transit switch %s: %w", zic.networkTransitSwitchName, err) + } + + // Connect transit switch to the cluster router by creating a pair of logical switch port - logical router port + logicalRouterPortName := zic.getNetworkScopedName(types.RouterToTransitSwitchPrefix + node.Name) + logicalRouterPort := nbdb.LogicalRouterPort{ + Name: logicalRouterPortName, + MAC: transitRouterPortMac.String(), + Networks: transitRouterPortNetworks, + Options: map[string]string{ + "mcast_flood": "true", + }, + } + logicalRouter := nbdb.LogicalRouter{ + Name: zic.networkClusterRouterName, + } + + if err := libovsdbops.CreateOrUpdateLogicalRouterPort(zic.nbClient, &logicalRouter, &logicalRouterPort, nil); err != nil { + return fmt.Errorf("failed to create/update cluster router %s to add transit switch port %s for the node %s: %w", zic.networkClusterRouterName, logicalRouterPortName, node.Name, err) + } + + lspOptions := map[string]string{ + "router-port": logicalRouterPortName, + "requested-tnl-key": strconv.Itoa(nodeID), + } + + // Store the node name in the external_ids column for book keeping + externalIDs := map[string]string{ + "node": node.Name, + } + err = zic.addNodeLogicalSwitchPort(zic.networkTransitSwitchName, zic.getNetworkScopedName(types.TransitSwitchToRouterPrefix+node.Name), + lportTypeRouter, []string{lportTypeRouterAddr}, lspOptions, externalIDs) + if err != nil { + return err + } + + // Its possible that node is moved from a remote zone to the local zone. Check and delete the remote zone routes + // for this node as it's no longer needed. + return zic.deleteLocalNodeStaticRoutes(node, nodeID, nodeTransitSwitchPortIPs) +} + +// createRemoteZoneNodeResources creates the remote zone node resources +// - creates Transit switch if it doesn't yet exit +// - creates a logical port of type "remote" in the transit switch with the name as - .tstor. +// Eg. if the node name is ovn-worker and the network is default, the name would be - tstor.ovn-worker +// if the node name is ovn-worker and the network name is blue, the logical port name would be - blue.tstor.ovn-worker +// - binds the remote port to the node remote chassis +// - adds static routes for the remote node via the remote port ip in the ovn_cluster_router +func (zic *ZoneInterconnectHandler) createRemoteZoneNodeResources(node *corev1.Node, nodeID int, chassisId string) error { + nodeTransitSwitchPortIPs, err := util.ParseNodeTransitSwitchPortAddrs(node) + if err != nil || len(nodeTransitSwitchPortIPs) == 0 { + return fmt.Errorf("failed to get the node transit switch port Ips : %w", err) + } + + ts := &nbdb.LogicalSwitch{ + Name: zic.networkTransitSwitchName, + OtherConfig: map[string]string{ + "interconn-ts": zic.networkTransitSwitchName, + "requested-tnl-key": transitSwitchTunnelKey, + "mcast_snoop": "true", + "mcast_flood_unregistered": "true", + }, + } + + // Create transit switch if it doesn't exist + if err := libovsdbops.CreateOrUpdateLogicalSwitch(zic.nbClient, ts); err != nil { + return fmt.Errorf("failed to create/update transit switch %s: %w", zic.networkTransitSwitchName, err) + } + + transitRouterPortMac := util.IPAddrToHWAddr(nodeTransitSwitchPortIPs[0].IP) + var transitRouterPortNetworks []string + for _, ip := range nodeTransitSwitchPortIPs { + transitRouterPortNetworks = append(transitRouterPortNetworks, ip.String()) + } + + remotePortAddr := transitRouterPortMac.String() + for _, tsNetwork := range transitRouterPortNetworks { + remotePortAddr = remotePortAddr + " " + tsNetwork + } + + lspOptions := map[string]string{ + "requested-tnl-key": strconv.Itoa(nodeID), + } + // Store the node name in the external_ids column for book keeping + externalIDs := map[string]string{ + "node": node.Name, + } + + remotePortName := zic.getNetworkScopedName(types.TransitSwitchToRouterPrefix + node.Name) + if err := zic.addNodeLogicalSwitchPort(zic.networkTransitSwitchName, remotePortName, lportTypeRemote, []string{remotePortAddr}, lspOptions, externalIDs); err != nil { + return err + } + // Set the port binding chassis. + if err := zic.setRemotePortBindingChassis(node.Name, remotePortName, chassisId); err != nil { + return err + } + + if err := zic.addRemoteNodeStaticRoutes(node, nodeTransitSwitchPortIPs); err != nil { + return err + } + + // Cleanup the logical router port connecting to the transit switch for the remote node (if present) + // Cleanup would be required when a local zone node moves to a remote zone. + return zic.cleanupNodeClusterRouterPort(node.Name) +} + +func (zic *ZoneInterconnectHandler) addNodeLogicalSwitchPort(logicalSwitchName, portName, portType string, addresses []string, options, externalIDs map[string]string) error { + logicalSwitch := nbdb.LogicalSwitch{ + Name: logicalSwitchName, + } + + logicalSwitchPort := nbdb.LogicalSwitchPort{ + Name: portName, + Type: portType, + Options: options, + Addresses: addresses, + ExternalIDs: externalIDs, + } + if err := libovsdbops.CreateOrUpdateLogicalSwitchPortsOnSwitch(zic.nbClient, &logicalSwitch, &logicalSwitchPort); err != nil { + return fmt.Errorf("failed to add logical port %s to switch %s, error: %w", portName, logicalSwitch.Name, err) + } + return nil +} + +// cleanupNode cleansup the local zone node or remote zone node resources +func (zic *ZoneInterconnectHandler) cleanupNode(nodeName string) error { + klog.Infof("Cleaning up interconnect resources for the node %s for the network %s", nodeName, zic.GetNetworkName()) + + // Cleanup the logical router port in the cluster router for the node + // if it exists. + if err := zic.cleanupNodeClusterRouterPort(nodeName); err != nil { + return err + } + + // Cleanup the logical switch port in the transit switch for the node + // if it exists. + if err := zic.cleanupNodeTransitSwitchPort(nodeName); err != nil { + return err + } + + // Delete any static routes in the cluster router for this node + p := func(lrsr *nbdb.LogicalRouterStaticRoute) bool { + return lrsr.ExternalIDs["ic-node"] == nodeName + } + if err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(zic.nbClient, zic.networkClusterRouterName, p); err != nil { + return fmt.Errorf("failed to cleanup static routes for the node %s: %w", nodeName, err) + } + + return nil +} + +func (zic *ZoneInterconnectHandler) cleanupNodeClusterRouterPort(nodeName string) error { + lrp := nbdb.LogicalRouterPort{ + Name: zic.getNetworkScopedName(types.RouterToTransitSwitchPrefix + nodeName), + } + logicalRouterPort, err := libovsdbops.GetLogicalRouterPort(zic.nbClient, &lrp) + if err != nil { + // logical router port doesn't exist. So nothing to cleanup. + return nil + } + + logicalRouter := nbdb.LogicalRouter{ + Name: zic.networkClusterRouterName, + } + + if err := libovsdbops.DeleteLogicalRouterPorts(zic.nbClient, &logicalRouter, logicalRouterPort); err != nil { + return fmt.Errorf("failed to delete logical router port %s from router %s for the node %s, error: %w", logicalRouterPort.Name, zic.networkClusterRouterName, nodeName, err) + } + + return nil +} + +func (zic *ZoneInterconnectHandler) cleanupNodeTransitSwitchPort(nodeName string) error { + logicalSwitch := &nbdb.LogicalSwitch{ + Name: zic.networkTransitSwitchName, + } + logicalSwitchPort := &nbdb.LogicalSwitchPort{ + Name: zic.getNetworkScopedName(types.TransitSwitchToRouterPrefix + nodeName), + } + + if err := libovsdbops.DeleteLogicalSwitchPorts(zic.nbClient, logicalSwitch, logicalSwitchPort); err != nil { + return fmt.Errorf("failed to delete logical switch port %s from transit switch %s for the node %s, error: %w", logicalSwitchPort.Name, zic.networkTransitSwitchName, nodeName, err) + } + return nil +} + +func (zic *ZoneInterconnectHandler) setRemotePortBindingChassis(nodeName, portName, chassisId string) error { + remotePort := sbdb.PortBinding{ + LogicalPort: portName, + } + chassis := sbdb.Chassis{ + Hostname: nodeName, + Name: chassisId, + } + + if err := libovsdbops.UpdatePortBindingSetChassis(zic.sbClient, &remotePort, &chassis); err != nil { + return fmt.Errorf("failed to update chassis %s for remote port %s, error: %w", nodeName, portName, err) + } + + return nil +} + +// addRemoteNodeStaticRoutes adds static routes in ovn_cluster_router to reach the remote node via the +// remote node transit switch port. +// Eg. if node ovn-worker2 is a remote node +// ovn-worker2 - { node_subnet = 10.244.0.0/24, node id = 2, transit switch port ip = 168.254.0.2/16, join ip connecting to GR_ovn-worker = 100.64.0.2/16} +// Then the below static routes are added +// ip4.dst == 10.244.0.0/24 , nexthop = 168.254.0.2 +// ip4.dst == 100.64.0.2/16 , nexthop = 168.254.0.2 (only for default primary network) +func (zic *ZoneInterconnectHandler) addRemoteNodeStaticRoutes(node *corev1.Node, nodeTransitSwitchPortIPs []*net.IPNet) error { + addRoute := func(prefix, nexthop string) error { + logicalRouterStaticRoute := nbdb.LogicalRouterStaticRoute{ + ExternalIDs: map[string]string{ + "ic-node": node.Name, + }, + Nexthop: nexthop, + IPPrefix: prefix, + } + p := func(lrsr *nbdb.LogicalRouterStaticRoute) bool { + return lrsr.IPPrefix == prefix && + lrsr.Nexthop == nexthop && + lrsr.ExternalIDs["ic-node"] == node.Name + } + if err := libovsdbops.CreateOrReplaceLogicalRouterStaticRouteWithPredicate(zic.nbClient, zic.networkClusterRouterName, &logicalRouterStaticRoute, p); err != nil { + return fmt.Errorf("failed to create static route: %w", err) + } + return nil + } + + nodeSubnets, err := util.ParseNodeHostSubnetAnnotation(node, zic.GetNetworkName()) + if err != nil { + return fmt.Errorf("failed to parse node %s subnets annotation %w", node.Name, err) + } + + nodeSubnetStaticRoutes := zic.getStaticRoutes(nodeSubnets, nodeTransitSwitchPortIPs, false) + for _, staticRoute := range nodeSubnetStaticRoutes { + // Possible optimization: Add all the routes in one transaction + if err := addRoute(staticRoute.prefix, staticRoute.nexthop); err != nil { + return fmt.Errorf("error adding static route %s - %s to the router %s : %w", staticRoute.prefix, staticRoute.nexthop, zic.networkClusterRouterName, err) + } + } + + if zic.IsSecondary() { + // Secondary network cluster router doesn't connect to a join switch + // or to a Gateway router. + return nil + } + + nodeGRPIPs, err := util.ParseNodeGatewayRouterLRPAddrs(node) + if err != nil { + return fmt.Errorf("failed to parse node %s Gateway router LRP Addrs annotation %w", node.Name, err) + } + + nodeGRPIPStaticRoutes := zic.getStaticRoutes(nodeGRPIPs, nodeTransitSwitchPortIPs, true) + for _, staticRoute := range nodeGRPIPStaticRoutes { + // Possible optimization: Add all the routes in one transaction + if err := addRoute(staticRoute.prefix, staticRoute.nexthop); err != nil { + return fmt.Errorf("error adding static route %s - %s to the router %s : %w", staticRoute.prefix, staticRoute.nexthop, zic.networkClusterRouterName, err) + } + } + + return nil +} + +// deleteLocalNodeStaticRoutes deletes the static routes added by the function addRemoteNodeStaticRoutes +func (zic *ZoneInterconnectHandler) deleteLocalNodeStaticRoutes(node *corev1.Node, nodeID int, nodeTransitSwitchPortIPs []*net.IPNet) error { + deleteRoute := func(prefix, nexthop string) error { + p := func(lrsr *nbdb.LogicalRouterStaticRoute) bool { + return lrsr.IPPrefix == prefix && + lrsr.Nexthop == nexthop && + lrsr.ExternalIDs["ic-node"] == node.Name + } + if err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(zic.nbClient, zic.networkClusterRouterName, p); err != nil { + return fmt.Errorf("failed to delete static route: %w", err) + } + return nil + } + + nodeSubnets, err := util.ParseNodeHostSubnetAnnotation(node, zic.GetNetworkName()) + if err != nil { + return fmt.Errorf("failed to parse node %s subnets annotation %w", node.Name, err) + } + + nodeSubnetStaticRoutes := zic.getStaticRoutes(nodeSubnets, nodeTransitSwitchPortIPs, false) + for _, staticRoute := range nodeSubnetStaticRoutes { + // Possible optimization: Add all the routes in one transaction + if err := deleteRoute(staticRoute.prefix, staticRoute.nexthop); err != nil { + return fmt.Errorf("error deleting static route %s - %s from the router %s : %w", staticRoute.prefix, staticRoute.nexthop, zic.networkClusterRouterName, err) + } + } + + if zic.IsSecondary() { + // Secondary network cluster router doesn't connect to a join switch + // or to a Gateway router. + return nil + } + + // Clear the routes connecting to the GW Router for the default network + nodeGRPIPs, err := util.ParseNodeGatewayRouterLRPAddrs(node) + if err != nil { + return fmt.Errorf("failed to parse node %s Gateway router LRP Addrs annotation %w", node.Name, err) + } + + nodenodeGRPIPStaticRoutes := zic.getStaticRoutes(nodeGRPIPs, nodeTransitSwitchPortIPs, true) + for _, staticRoute := range nodenodeGRPIPStaticRoutes { + // Possible optimization: Add all the routes in one transaction + if err := deleteRoute(staticRoute.prefix, staticRoute.nexthop); err != nil { + return fmt.Errorf("error deleting static route %s - %s from the router %s : %w", staticRoute.prefix, staticRoute.nexthop, zic.networkClusterRouterName, err) + } + } + + return nil +} + +// getNetworkScopedName returns the network scoped name. +// Note: For default primary network, zic.GetPrefix() will return "" +// and for secondary networks it will return "_" +func (zic *ZoneInterconnectHandler) getNetworkScopedName(name string) string { + return fmt.Sprintf("%s%s", zic.GetPrefix(), name) +} + +// interconnectStaticRoute represents a static route +type interconnectStaticRoute struct { + prefix string + nexthop string +} + +// getStaticRoutes returns a list of static routes from the provided ipPrefix'es and nexthops +// Eg. If ipPrefixes - [10.0.0.4/24, aef0::4/64] and nexthops - [168.254.0.4/16, bef0::4/64] and fullMask is true +// +// It will return [interconnectStaticRoute { prefix : 10.0.0.4/32, nexthop : 168.254.0.4}, +// - interconnectStaticRoute { prefix : aef0::4/128, nexthop : bef0::4}} +// +// If fullMask is false, it will return +// [interconnectStaticRoute { prefix : 10.0.0.4/24, nexthop : 168.254.0.4}, +// - interconnectStaticRoute { prefix : aef0::4/64, nexthop : bef0::4}} +func (zic *ZoneInterconnectHandler) getStaticRoutes(ipPrefixes []*net.IPNet, nexthops []*net.IPNet, fullMask bool) []*interconnectStaticRoute { + var staticRoutes []*interconnectStaticRoute + + for _, prefix := range ipPrefixes { + for _, nexthop := range nexthops { + if utilnet.IPFamilyOfCIDR(prefix) != utilnet.IPFamilyOfCIDR(nexthop) { + continue + } + p := "" + if fullMask { + p = prefix.IP.String() + util.GetIPFullMask(prefix.IP.String()) + } else { + p = prefix.String() + } + + staticRoute := &interconnectStaticRoute{ + prefix: p, + nexthop: nexthop.IP.String(), + } + staticRoutes = append(staticRoutes, staticRoute) + } + } + + return staticRoutes +} diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go new file mode 100644 index 0000000000..2b8a3e6931 --- /dev/null +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go @@ -0,0 +1,819 @@ +package zoneinterconnect + +import ( + "context" + "fmt" + "sort" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + + cnitypes "github.com/containernetworking/cni/pkg/types" + "github.com/urfave/cli/v2" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +const ( + // ovnNodeIDAnnotaton is the node annotation name used to store the node id. + ovnNodeIDAnnotaton = "k8s.ovn.org/node-id" + + // ovnNodeGRLRPAddrAnnotaton is the node annotation name used to store the node gateway router port ips. + ovnNodeGRLRPAddrAnnotaton = "k8s.ovn.org/node-gateway-router-lrp-ifaddr" + + // ovnTransitSwitchPortAddrAnnotation is the node annotation name to store the transit switch port ips. + ovnTransitSwitchPortAddrAnnotation = "k8s.ovn.org/node-transit-switch-port-ifaddr" + + // ovnNodeZoneNameAnnotation is the node annotation name to store the node zone name. + ovnNodeZoneNameAnnotation = "k8s.ovn.org/zone-name" + + // ovnNodeChassisIDAnnotatin is the node annotation name to store the node chassis id. + ovnNodeChassisIDAnnotatin = "k8s.ovn.org/node-chassis-id" + + // ovnNodeSubnetsAnnotation is the node annotation name to store the node subnets. + ovnNodeSubnetsAnnotation = "k8s.ovn.org/node-subnets" +) + +func newClusterJoinSwitch() *nbdb.LogicalSwitch { + return &nbdb.LogicalSwitch{ + UUID: types.OVNJoinSwitch + "-UUID", + Name: types.OVNJoinSwitch, + } +} + +func newOVNClusterRouter(netName string) *nbdb.LogicalRouter { + return &nbdb.LogicalRouter{ + UUID: getNetworkScopedName(netName, types.OVNClusterRouter) + "-UUID", + Name: getNetworkScopedName(netName, types.OVNClusterRouter), + } +} + +func createTransitSwitchPortBindings(sbClient libovsdbclient.Client, netName string, nodes ...*corev1.Node) error { + for _, node := range nodes { + pb := &sbdb.PortBinding{ + LogicalPort: getNetworkScopedName(netName, types.TransitSwitchToRouterPrefix+node.Name), + } + + ops, err := sbClient.Create(pb) + if err != nil { + return err + } + _, err = sbClient.Transact(context.Background(), ops...) + if err != nil { + return err + } + } + + return nil +} + +func getNetworkScopedName(netName, name string) string { + if netName == types.DefaultNetworkName { + return fmt.Sprintf("%s", name) + } + return fmt.Sprintf("%s%s", util.GetSecondaryNetworkPrefix(netName), name) +} + +func invokeICHandlerAddNodeFunction(zone string, icHandler *ZoneInterconnectHandler, nodes ...*corev1.Node) error { + for _, node := range nodes { + if util.GetNodeZone(node) == zone { + err := icHandler.AddLocalZoneNode(node) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } else { + err := icHandler.AddRemoteZoneNode(node) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + } + + return nil +} + +func checkInterconnectResources(zone string, netName string, nbClient libovsdbclient.Client, testNodesRouteInfo map[string]map[string]string, nodes ...*corev1.Node) error { + localZoneNodes := []*corev1.Node{} + remoteZoneNodes := []*corev1.Node{} + localZoneNodeNames := []string{} + remoteZoneNodeNames := []string{} + for _, node := range nodes { + nodeZone := util.GetNodeZone(node) + if nodeZone == zone { + localZoneNodes = append(localZoneNodes, node) + localZoneNodeNames = append(localZoneNodeNames, node.Name) + } else { + remoteZoneNodes = append(remoteZoneNodes, node) + remoteZoneNodeNames = append(remoteZoneNodeNames, node.Name) + } + + } + + sort.Strings(localZoneNodeNames) + sort.Strings(remoteZoneNodeNames) + // First check if transit switch exists or not + s := nbdb.LogicalSwitch{ + Name: getNetworkScopedName(netName, types.TransitSwitch), + } + + ts, err := libovsdbops.GetLogicalSwitch(nbClient, &s) + + if err != nil { + return fmt.Errorf("could not find transit switch %s in the nb db for network %s : err - %v", s.Name, netName, err) + } + + noOfTSPorts := len(localZoneNodes) + len(remoteZoneNodes) + + if len(ts.Ports) != noOfTSPorts { + return fmt.Errorf("transit switch %s doesn't have expected logical ports. Found %d : Expected %d ports", + getNetworkScopedName(netName, types.TransitSwitch), len(ts.Ports), noOfTSPorts) + } + // Checking just to be sure that the returned switch is infact transit switch. + if ts.Name != getNetworkScopedName(netName, types.TransitSwitch) { + return fmt.Errorf("transit switch %s not found in NB DB. Instead found %s", getNetworkScopedName(netName, types.TransitSwitch), ts.Name) + } + + tsPorts := make([]string, noOfTSPorts) + i := 0 + for _, p := range ts.Ports { + lp := nbdb.LogicalSwitchPort{ + UUID: p, + } + + lsp, err := libovsdbops.GetLogicalSwitchPort(nbClient, &lp) + if err != nil { + return fmt.Errorf("could not find logical switch port with uuid %s in the nb db for network %s : err - %v", p, netName, err) + } + tsPorts[i] = lsp.Name + ":" + lsp.Type + i++ + } + + sort.Strings(tsPorts) + + // Verify Transit switch ports. + // For local nodes, the transit switch port should be of type 'router' + // and for remote zone nodes, it should be of type 'remote'. + expectedTsPorts := make([]string, noOfTSPorts) + i = 0 + for _, node := range localZoneNodes { + // The logical port for the local zone nodes should be of type patch. + nodeTSPortName := getNetworkScopedName(netName, types.TransitSwitchToRouterPrefix+node.Name) + expectedTsPorts[i] = nodeTSPortName + ":router" + i++ + } + + for _, node := range remoteZoneNodes { + // The logical port for the local zone nodes should be of type patch. + nodeTSPortName := getNetworkScopedName(netName, types.TransitSwitchToRouterPrefix+node.Name) + expectedTsPorts[i] = nodeTSPortName + ":remote" + i++ + } + + sort.Strings(expectedTsPorts) + gomega.Expect(tsPorts).To(gomega.Equal(expectedTsPorts)) + + r := nbdb.LogicalRouter{ + Name: getNetworkScopedName(netName, types.OVNClusterRouter), + } + + clusterRouter, err := libovsdbops.GetLogicalRouter(nbClient, &r) + if err != nil { + return fmt.Errorf("could not find cluster router %s in the nb db for network %s : err - %v", r.Name, netName, err) + } + + // Verify that the OVN cluster router ports for each local node + // connects to the Transit switch. + icClusterRouterPorts := []string{} + lrpPrefixName := getNetworkScopedName(netName, types.RouterToTransitSwitchPrefix) + for _, p := range clusterRouter.Ports { + lp := nbdb.LogicalRouterPort{ + UUID: p, + } + + lrp, err := libovsdbops.GetLogicalRouterPort(nbClient, &lp) + if err != nil { + return fmt.Errorf("could not find logical router port with uuid %s in the nb db for network %s : err - %v", p, netName, err) + } + + if lrp.Name[:len(lrpPrefixName)] == lrpPrefixName { + icClusterRouterPorts = append(icClusterRouterPorts, lrp.Name) + } + } + + sort.Strings(icClusterRouterPorts) + + expectedICClusterRouterPorts := []string{} + for _, node := range localZoneNodes { + expectedICClusterRouterPorts = append(expectedICClusterRouterPorts, getNetworkScopedName(netName, types.RouterToTransitSwitchPrefix+node.Name)) + } + sort.Strings(expectedICClusterRouterPorts) + + gomega.Expect(icClusterRouterPorts).To(gomega.Equal(expectedICClusterRouterPorts)) + + // Verify the static routes + expectedStaticRoutes := []string{} + + for _, node := range remoteZoneNodeNames { + nodeRouteInfo := testNodesRouteInfo[node] + expectedStaticRoutes = append(expectedStaticRoutes, nodeRouteInfo["node-subnets"]+"-"+nodeRouteInfo["ts-ip"]) + if netName == types.DefaultNetworkName { + expectedStaticRoutes = append(expectedStaticRoutes, nodeRouteInfo["host-route"]+"-"+nodeRouteInfo["ts-ip"]) + } + } + sort.Strings(expectedStaticRoutes) + + clusterRouterStaticRoutes := []string{} + for _, srUUID := range clusterRouter.StaticRoutes { + newPredicate := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.UUID == srUUID + } + sr, err := libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(nbClient, newPredicate) + if err != nil { + return err + } + + clusterRouterStaticRoutes = append(clusterRouterStaticRoutes, sr[0].IPPrefix+"-"+sr[0].Nexthop) + } + sort.Strings(clusterRouterStaticRoutes) + gomega.Expect(clusterRouterStaticRoutes).To(gomega.Equal(expectedStaticRoutes)) + + return nil +} + +var _ = ginkgo.Describe("Zone Interconnect Operations", func() { + var ( + app *cli.App + libovsdbCleanup *libovsdbtest.Cleanup + testNode1 corev1.Node + testNode2 corev1.Node + testNode3 corev1.Node + node1Chassis sbdb.Chassis + node2Chassis sbdb.Chassis + node3Chassis sbdb.Chassis + initialNBDB []libovsdbtest.TestData + initialSBDB []libovsdbtest.TestData + testNodesRouteInfo map[string]map[string]string + ) + + const ( + clusterIPNet string = "10.1.0.0" + clusterCIDR string = clusterIPNet + "/16" + joinSubnetCIDR string = "100.64.0.0/16/19" + vlanID = 1024 + ) + + ginkgo.BeforeEach(func() { + // Restore global default values before each testcase + //gomega.Expect(config.PrepareTestConfig()).To(gomega.Succeed()) + + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + libovsdbCleanup = nil + + node1Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", Hostname: "node1", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6"} + node2Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", Hostname: "node2", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7"} + node3Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", Hostname: "node3", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8"} + + }) + + ginkgo.AfterEach(func() { + if libovsdbCleanup != nil { + libovsdbCleanup.Cleanup() + } + }) + + ginkgo.Context("Default network", func() { + ginkgo.BeforeEach(func() { + // node1 is a local zone node + testNode1 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Annotations: map[string]string{ + ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + ovnNodeZoneNameAnnotation: "global", + ovnNodeIDAnnotaton: "2", + ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.2.0/24\"]}", + ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.2/16\"}", + ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.2/16\"}", + }, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.10"}}, + }, + } + // node2 is a local zone node + testNode2 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Annotations: map[string]string{ + ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + ovnNodeZoneNameAnnotation: "global", + ovnNodeIDAnnotaton: "3", + ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.3.0/24\"]}", + ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.3/16\"}", + ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.3/16\"}", + }, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.11"}}, + }, + } + // node3 is a remote zone node + testNode3 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + Annotations: map[string]string{ + ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + ovnNodeZoneNameAnnotation: "foo", + ovnNodeIDAnnotaton: "4", + ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.4.0/24\"]}", + ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.4/16\"}", + ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.4/16\"}", + }, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.12"}}, + }, + } + + testNodesRouteInfo = map[string]map[string]string{ + "node1": {"node-subnets": "10.244.2.0/24", "ts-ip": "168.254.0.2", "host-route": "100.64.0.2/32"}, + "node2": {"node-subnets": "10.244.3.0/24", "ts-ip": "168.254.0.3", "host-route": "100.64.0.3/32"}, + "node3": {"node-subnets": "10.244.4.0/24", "ts-ip": "168.254.0.4", "host-route": "100.64.0.4/32"}, + } + initialNBDB = []libovsdbtest.TestData{ + newClusterJoinSwitch(), + newOVNClusterRouter(types.DefaultNetworkName), + } + + initialSBDB = []libovsdbtest.TestData{ + &node1Chassis, &node2Chassis, &node3Chassis} + }) + + ginkgo.It("Basic checks", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient) + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Change node zones", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient) + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Change the zone of node2 to a remote zone + testNode2.Annotations[ovnNodeZoneNameAnnotation] = "bar" + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Change the zone of node2 and node3 to global (no remote zone nodes) + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Sync nodes", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient) + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Call ICHandler SyncNodes function removing the testNode3 from the list of nodes + var kNodes []interface{} + kNodes = append(kNodes, &testNode1) + kNodes = append(kNodes, &testNode2) + err = zoneICHandler.SyncNodes(kNodes) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("Secondary networks", func() { + ginkgo.BeforeEach(func() { + testNode1 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Annotations: map[string]string{ + ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + ovnNodeZoneNameAnnotation: "global", + ovnNodeIDAnnotaton: "2", + ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.2.0/24\"]}", + ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.2/16\"}", + ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.2/16\"}", + }, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.10"}}, + }, + } + // node2 is a local zone node + testNode2 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Annotations: map[string]string{ + ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + ovnNodeZoneNameAnnotation: "global", + ovnNodeIDAnnotaton: "3", + ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.3.0/24\"]}", + ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.3/16\"}", + ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.3/16\"}", + }, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.11"}}, + }, + } + // node3 is a remote zone node + testNode3 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + Annotations: map[string]string{ + ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + ovnNodeZoneNameAnnotation: "foo", + ovnNodeIDAnnotaton: "4", + ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.4.0/24\"]}", + ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.4/16\"}", + ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.4/16\"}", + }, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.12"}}, + }, + } + testNodesRouteInfo = map[string]map[string]string{ + "node1": {"node-subnets": "10.244.2.0/24", "ts-ip": "168.254.0.2", "host-route": "100.64.0.2/32"}, + "node2": {"node-subnets": "10.244.3.0/24", "ts-ip": "168.254.0.3", "host-route": "100.64.0.3/32"}, + "node3": {"node-subnets": "10.244.4.0/24", "ts-ip": "168.254.0.4", "host-route": "100.64.0.4/32"}, + } + initialNBDB = []libovsdbtest.TestData{ + newOVNClusterRouter("blue"), + } + + initialSBDB = []libovsdbtest.TestData{ + &node1Chassis, &node2Chassis, &node3Chassis} + }) + + ginkgo.It("Basic checks", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: cnitypes.NetConf{Name: "blue"}, Topology: types.Layer3Topology}) + zoneICHandler := NewZoneInterconnectHandler(netInfo, libovsdbOvnNBClient, libovsdbOvnSBClient) + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", "blue", libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Sync nodes", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: cnitypes.NetConf{Name: "blue"}, Topology: types.Layer3Topology}) + zoneICHandler := NewZoneInterconnectHandler(netInfo, libovsdbOvnNBClient, libovsdbOvnSBClient) + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", "blue", libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Call ICHandler SyncNodes function removing the testNode3 from the list of nodes + var kNodes []interface{} + kNodes = append(kNodes, &testNode1) + kNodes = append(kNodes, &testNode2) + err = zoneICHandler.SyncNodes(kNodes) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = checkInterconnectResources("global", "blue", libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("Error scenarios", func() { + ginkgo.It("Missing annotations and error scenarios for local node", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + testNode4 := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node4", + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.10"}}, + }, + } + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient) + gomega.Expect(zoneICHandler).NotTo(gomega.BeNil()) + + err = zoneICHandler.AddLocalZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to get node id for node - node4") + + // Set the node id + testNode4.Annotations = map[string]string{ovnNodeIDAnnotaton: "5"} + err = zoneICHandler.AddLocalZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to get the node transit switch port ips for node node4") + + // Set the node transit switch port ips + testNode4.Annotations[ovnTransitSwitchPortAddrAnnotation] = "{\"ipv4\":\"168.254.0.5/16\"}" + err = zoneICHandler.AddLocalZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to create/update cluster router ovn_cluster_router to add transit switch port rtots-node4 for the node node4") + + // Create the cluster router + r := newOVNClusterRouter(types.DefaultNetworkName) + err = libovsdbops.CreateOrUpdateLogicalRouter(libovsdbOvnNBClient, r) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = zoneICHandler.AddLocalZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to parse node node4 subnets annotation") + + // Set node subnet annotation + testNode4.Annotations[ovnNodeSubnetsAnnotation] = "{\"default\":[\"10.244.5.0/24\"]}" + + err = zoneICHandler.AddLocalZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to parse node node4 GR IPs annotation") + + // Set node ovn-gw-router-port-ips annotation + testNode4.Annotations[ovnNodeGRLRPAddrAnnotaton] = "{\"ipv4\":\"100.64.0.5/16\"}" + err = zoneICHandler.AddLocalZoneNode(&testNode4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + testNodesRouteInfo = map[string]map[string]string{ + "node4": {"node-subnets": "10.244.5.0/24", "ts-ip": "168.254.0.5", "host-route": "100.64.0.5/32"}, + } + err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Missing annotations and error scenarios for remote node", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + testNode4 := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node4", + Annotations: map[string]string{ + ovnNodeZoneNameAnnotation: "foo", + }, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.10"}}, + }, + } + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient) + gomega.Expect(zoneICHandler).NotTo(gomega.BeNil()) + + err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to get node id for node - node4") + + // Set the node id + testNode4.Annotations[ovnNodeIDAnnotaton] = "5" + err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to parse node chassis-id for node") + + // Set the node-chassis-id + testNode4.Annotations[ovnNodeChassisIDAnnotatin] = "cb9ec8fa-b409-4ef3-9f42-d9283c47aac9" + err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to get the node transit switch port ips for node node4") + + // Set the node transit switch port ips + testNode4.Annotations[ovnTransitSwitchPortAddrAnnotation] = "{\"ipv4\":\"168.254.0.5/16\"}" + err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to update chassis node4 for remote port tstor-node4") + + // Create remote chassis + node4Chassis := &sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac9", Hostname: "node4", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac9"} + encap := &sbdb.Encap{ChassisName: node4Chassis.Name, IP: "10.0.0.12"} + err = libovsdbops.CreateOrUpdateChassis(libovsdbOvnSBClient, node4Chassis, encap) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to parse node node4 subnets annotation") + + // Set node subnet annotation + testNode4.Annotations[ovnNodeSubnetsAnnotation] = "{\"default\":[\"10.244.5.0/24\"]}" + err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "unable to create static routes") + + // Create the cluster router + r := newOVNClusterRouter(types.DefaultNetworkName) + err = libovsdbops.CreateOrUpdateLogicalRouter(libovsdbOvnNBClient, r) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to parse node node4 GR IPs annotation") + + // Set node ovn-gw-router-port-ips annotation + testNode4.Annotations[ovnNodeGRLRPAddrAnnotaton] = "{\"ipv4\":\"100.64.0.5/16\"}" + err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + testNodesRouteInfo = map[string]map[string]string{ + "node4": {"node-subnets": "10.244.5.0/24", "ts-ip": "168.254.0.5", "host-route": "100.64.0.5/32"}, + } + err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) +}) diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_interconnect_suit_test.go b/go-controller/pkg/ovn/zone_interconnect/zone_interconnect_suit_test.go new file mode 100644 index 0000000000..6115c68c0f --- /dev/null +++ b/go-controller/pkg/ovn/zone_interconnect/zone_interconnect_suit_test.go @@ -0,0 +1,13 @@ +package zoneinterconnect + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +func TestZoneInterconnect(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Zone interconnect Operations Suite") +} diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 562babc253..6c8a3f1e8e 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -55,6 +55,10 @@ const ( // Local Bridge used for localnet topology network access LocalNetBridgeName = "br-localnet" + TransitSwitch = "transit_switch" + TransitSwitchToRouterPrefix = "tstor-" + RouterToTransitSwitchPrefix = "rtots-" + // ACL Priorities // Default routed multicast allow acl rule priority diff --git a/test/e2e/multicast.go b/test/e2e/multicast.go index bf3ff560a8..c02f4a3e70 100644 --- a/test/e2e/multicast.go +++ b/test/e2e/multicast.go @@ -82,8 +82,8 @@ var _ = ginkgo.Describe("Multicast", func() { // Start the multicast source (iperf client is the sender in multicast) ginkgo.By("creating a pod as a multicast source in node " + clientNodeInfo.name) - // multicast group (-c 224.3.3.3), UDP (-u), TTL (-T 2), during (-t 3000) seconds, report every (-i 5) seconds - iperf := fmt.Sprintf("iperf -c %s -u -T 2 -t 3000 -i 5", mcastGroup) + // multicast group (-c 224.3.3.3), UDP (-u), TTL (-T 3), during (-t 3000) seconds, report every (-i 5) seconds + iperf := fmt.Sprintf("iperf -c %s -u -T 3 -t 3000 -i 5", mcastGroup) if IsIPv6Cluster(cs) { iperf = iperf + " -V" } From 33be50ff1267f349b6fdb0e7318481faafa1b66a Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Thu, 7 Jul 2022 16:52:07 -0400 Subject: [PATCH 07/90] network-controller-manager: Manage network policies for each zone. Also track remote zone pod IPs in their namespaces. We need them in order to be able to select remote zone pods by namespace in a network policy. Signed-off-by: Dumitru Ceara Co-authored-by: Numan Siddique Signed-off-by: Numan Siddique --- .../pkg/ovn/base_network_controller_pods.go | 34 +++++-- .../pkg/ovn/base_network_controller_policy.go | 4 + go-controller/pkg/ovn/namespace.go | 35 +++++++ go-controller/pkg/ovn/ovn.go | 93 ++++++++++++++++++- 4 files changed, 152 insertions(+), 14 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 0880e2ff0d..9061688874 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -220,20 +220,15 @@ func (bnc *BaseNetworkController) deletePodLogicalPort(pod *kapi.Pod, portInfo * return true, nil } - var needleIPs []net.IP - for _, podIPNet := range podIfAddrs { - needleIPs = append(needleIPs, podIPNet.IP) - } - - collidingPod, err := bnc.findPodWithIPAddresses(needleIPs) + canRelease, err := bnc.canReleasePodIPs(podIfAddrs) if err != nil { return false, fmt.Errorf("unable to determine if completed pod IP is in use by another pod. "+ "Will not release pod %s/%s IP: %#v from allocator. %v", pod.Namespace, pod.Name, podIfAddrs, err) } - if collidingPod != nil { - klog.Infof("Will not release IP address: %s for %s. Detected another pod"+ - " using this IP: %s/%s", util.JoinIPNetIPs(podIfAddrs, " "), podDesc, collidingPod.Namespace, collidingPod.Name) + if !canRelease { + klog.Infof("Will not release IP address: %s for %s. Detected another pod using it."+ + " using this IP: %s/%s", util.JoinIPNetIPs(podIfAddrs, " "), podDesc) return false, nil } @@ -317,6 +312,27 @@ func (bnc *BaseNetworkController) findPodWithIPAddresses(needleIPs []net.IP) (*k return nil, nil } +// canReleasePodIPs checks if the podIPs can be released or not. +func (bnc *BaseNetworkController) canReleasePodIPs(podIfAddrs []*net.IPNet) (bool, error) { + var needleIPs []net.IP + for _, podIPNet := range podIfAddrs { + needleIPs = append(needleIPs, podIPNet.IP) + } + collidingPod, err := bnc.findPodWithIPAddresses(needleIPs) + if err != nil { + return false, fmt.Errorf("unable to determine if pod IPs: %#v are in use by another pod :%w", podIfAddrs, err) + + } + + if collidingPod != nil { + klog.Infof("Should not release IP address: %s. Detected another pod"+ + " using this IP: %s/%s", util.JoinIPNetIPs(podIfAddrs, " "), collidingPod.Namespace, collidingPod.Name) + return false, nil + } + + return true, nil +} + func (bnc *BaseNetworkController) releasePodIPs(pInfo *lpInfo) error { if err := bnc.lsManager.ReleaseIPs(pInfo.logicalSwitch, pInfo.ips); err != nil { if !errors.Is(err, logicalswitchmanager.SwitchNotFound) { diff --git a/go-controller/pkg/ovn/base_network_controller_policy.go b/go-controller/pkg/ovn/base_network_controller_policy.go index dc17f3be5a..faad1641e8 100644 --- a/go-controller/pkg/ovn/base_network_controller_policy.go +++ b/go-controller/pkg/ovn/base_network_controller_policy.go @@ -514,6 +514,10 @@ func (bnc *BaseNetworkController) getNewLocalPolicyPorts(np *networkPolicy, continue } + if !bnc.isPodScheduledinLocalZone(pod) { + continue + } + // Skip pods that will never be present in logicalPortCache, // e.g. hostNetwork pods, overlay node pods, or completed pods if !bnc.podExpectedInLogicalCache(pod) { diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index 92c4649828..c3550ea1e2 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -7,6 +7,7 @@ import ( "github.com/ovn-org/libovsdb/ovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -74,6 +75,33 @@ func (oc *DefaultNetworkController) addPodToNamespace(ns string, ips []*net.IPNe return oc.getRoutingExternalGWs(nsInfo), oc.getRoutingPodGWs(nsInfo), ops, nil } +func (oc *DefaultNetworkController) addRemotePodToNamespace(ns string, ips []*net.IPNet) error { + _, _, ops, err := oc.addPodToNamespace(ns, ips) + + if err == nil { + _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) + if err != nil { + return fmt.Errorf("could not add pod IPs to the namespace address set - %+v", err) + } + } + return err +} + +func (oc *DefaultNetworkController) deleteRemotePodFromNamespace(ns string, ips []*net.IPNet) error { + nsInfo, nsUnlock := oc.getNamespaceLocked(ns, true) + if nsInfo == nil { + return nil + } + defer nsUnlock() + + if nsInfo.addressSet != nil { + if err := nsInfo.addressSet.DeleteIPs(createIPAddressSlice(ips)); err != nil { + return err + } + } + return nil +} + func createIPAddressSlice(ips []*net.IPNet) []net.IP { ipAddrs := make([]net.IP, 0) for _, ip := range ips { @@ -155,6 +183,10 @@ func (oc *DefaultNetworkController) updateNamespace(old, newer *kapi.Namespace) errors = append(errors, fmt.Errorf("failed to get all the pods (%v)", err)) } for _, pod := range existingPods { + if !oc.isPodScheduledinLocalZone(pod) { + continue + } + logicalPort := util.GetLogicalPortName(pod.Namespace, pod.Name) if util.PodWantsHostNetwork(pod) { continue @@ -199,6 +231,9 @@ func (oc *DefaultNetworkController) updateNamespace(old, newer *kapi.Namespace) errors = append(errors, fmt.Errorf("failed to get all the pods (%v)", err)) } for _, pod := range existingPods { + if !oc.isPodScheduledinLocalZone(pod) { + continue + } podAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, types.DefaultNetworkName) if err != nil { errors = append(errors, err) diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index d8f31db93f..a280f06728 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -17,6 +17,7 @@ import ( egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" svccontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/services" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -131,8 +132,7 @@ func (oc *DefaultNetworkController) ensurePod(oldPod, pod *kapi.Pod, addPort boo return oc.ensureLocalZonePod(oldPod, pod, addPort) } - // TODO (numans): For remote zone pods add the pod ips to the namespace address set - return nil + return oc.ensureRemoteZonePod(oldPod, pod, addPort) } // ensureLocalZonePod tries to set up a local zone pod. It returns nil on success and error on failure; failure @@ -175,6 +175,45 @@ func (oc *DefaultNetworkController) ensureLocalZonePod(oldPod, pod *kapi.Pod, ad return nil } +// ensureRemoteZonePod tries to set up remote zone pod bits required to interconnect it. +// - Adds the remote pod ips to the pod namespace address set for network policy and egress gw +// +// It returns nil on success and error on failure; failur indicates the pod set up should be retried later. +func (oc *DefaultNetworkController) ensureRemoteZonePod(oldPod, pod *kapi.Pod, addPort bool) error { + if len(pod.Status.PodIPs) < 1 { + return nil + } + podIfAddrs, err := util.GetPodCIDRsWithFullMask(pod) + if err != nil { + return fmt.Errorf("failed to get pod ips for the pod %s/%s : %w", pod.Namespace, pod.Name, err) + } + + if (addPort || (oldPod != nil && len(pod.Status.PodIPs) != len(oldPod.Status.PodIPs))) && !util.PodWantsHostNetwork(pod) { + if err := oc.addRemotePodToNamespace(pod.Namespace, podIfAddrs); err != nil { + return fmt.Errorf("failed to add remote pod %s/%s to namespace: %w", pod.Namespace, pod.Name, err) + } + } + + //FIXME: Update comments & reduce code duplication. + // check if this remote pod is serving as an external GW. + if oldPod != nil && (exGatewayAnnotationsChanged(oldPod, pod) || networkStatusAnnotationsChanged(oldPod, pod)) { + // Delete the routes in the namespace associated with this remote oldPod if its acting as an external GW + if err := oc.deletePodExternalGW(oldPod); err != nil { + return fmt.Errorf("deletePodExternalGW failed for remote pod %s/%s: %w", oldPod.Namespace, oldPod.Name, err) + } + } + + // either pod is host-networked or its an update for a normal pod (addPort=false case) + if oldPod == nil || exGatewayAnnotationsChanged(oldPod, pod) || networkStatusAnnotationsChanged(oldPod, pod) { + // check if this remote pod is serving as an external GW. If so add the routes in the namespace + // associated with this remote pod + if err := oc.addPodExternalGW(pod); err != nil { + return fmt.Errorf("addPodExternalGW failed for remote pod %s/%s: %v", pod.Namespace, pod.Name, err) + } + } + return nil +} + // removePod tried to tear down a pod. It returns nil on success and error on failure; // failure indicates the pod tear down should be retried later. func (oc *DefaultNetworkController) removePod(pod *kapi.Pod, portInfo *lpInfo) error { @@ -182,9 +221,7 @@ func (oc *DefaultNetworkController) removePod(pod *kapi.Pod, portInfo *lpInfo) e return oc.removeLocalZonePod(pod, portInfo) } - // TODO (numans) When we add the remote pod ips to the namespace address set, remove them - // when the remote pod is deleted. - return nil + return oc.removeRemoteZonePod(pod) } // removeLocalZonePod tries to tear down a local zone pod. It returns nil on success and error on failure; @@ -213,6 +250,52 @@ func (oc *DefaultNetworkController) removeLocalZonePod(pod *kapi.Pod, portInfo * return nil } +// removeRemoteZonePod tries to tear down a remote zone pod bits. It returns nil on success and error on failure; +// failure indicates the pod tear down should be retried later. +// It removes the remote pod ips from the namespace address set and if its an external gw pod, removes +// its routes. +func (oc *DefaultNetworkController) removeRemoteZonePod(pod *kapi.Pod) error { + podDesc := fmt.Sprintf("pod %s/%s/%s", types.DefaultNetworkName, pod.Namespace, pod.Name) + podIfAddrs, err := util.GetPodCIDRsWithFullMask(pod) + if err != nil { + return fmt.Errorf("failed to get pod ips for the pod %s/%s : %w", pod.Namespace, pod.Name, err) + } + + // Remove the pod ips from the namespace address set. Before that check if its a completed pod and + // make sure that the ips are not colliding with other pod. + shouldRelease := true + if util.PodCompleted(pod) { + shouldRelease, err := oc.canReleasePodIPs(podIfAddrs) + if err != nil { + klog.Errorf("Unable to determine if completed remote pod IP is in use by another pod. "+ + "Will not release pod %s/%s IP: %#v from namespace addressset. %w", pod.Namespace, pod.Name, podIfAddrs, err) + shouldRelease = false + } + + if !shouldRelease { + klog.Infof("Cannot release IP address: %s for %s/%s from namespace address set. Detected another pod"+ + " using this IP: %s/%s", util.JoinIPNetIPs(podIfAddrs, " "), pod.Namespace, pod.Name) + } + } + + if shouldRelease { + if err := oc.deleteRemotePodFromNamespace(pod.Namespace, podIfAddrs); err != nil { + return fmt.Errorf("failed to delete remote pod %s's IP from namespace: %w", podDesc, err) + } + } + + if util.PodWantsHostNetwork(pod) { + // Delete the routes in the namespace associated with this remote pod if it was acting as an external GW + if err := oc.deletePodExternalGW(pod); err != nil { + return fmt.Errorf("unable to delete external gateway routes for remote pod %s: %w", + getPodNamespacedName(pod), err) + } + return nil + } + + return nil +} + // WatchEgressFirewall starts the watching of egressfirewall resource and calls // back the appropriate handler logic func (oc *DefaultNetworkController) WatchEgressFirewall() error { From d14ba68f46700175e24c4d066d9b7f6de9d66b08 Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Thu, 7 Jul 2022 20:32:30 -0400 Subject: [PATCH 08/90] network-controller-manager: Add zone support for services. Service controller will consider only the zone nodes when building the cluster LBs and per node LBs. Note that when a service is created, eazh zone network controller manager will create an OVN load balancer for it. Signed-off-by: Numan Siddique --- .../ovn/controller/services/node_tracker.go | 30 +++++++--- .../services/services_controller.go | 6 +- .../services/services_controller_test.go | 58 ++++++++++++++++++- go-controller/pkg/util/node_annotations.go | 5 ++ 4 files changed, 89 insertions(+), 10 deletions(-) diff --git a/go-controller/pkg/ovn/controller/services/node_tracker.go b/go-controller/pkg/ovn/controller/services/node_tracker.go index 4fbb62966a..7d41e720c9 100644 --- a/go-controller/pkg/ovn/controller/services/node_tracker.go +++ b/go-controller/pkg/ovn/controller/services/node_tracker.go @@ -30,6 +30,9 @@ type nodeTracker struct { // resyncFn is the function to call so that all service are resynced resyncFn func(nodes []nodeInfo) + + // zone in which this nodeTracker is tracking + zone string } type nodeInfo struct { @@ -47,6 +50,9 @@ type nodeInfo struct { switchName string // The chassisID of the node (ovs.external-ids:system-id) chassisID string + + // The node's zone + zone string } func (ni *nodeInfo) hostAddressesStr() []string { @@ -86,9 +92,10 @@ func (ni *nodeInfo) nodeSubnets() []net.IPNet { return out } -func newNodeTracker(nodeInformer coreinformers.NodeInformer) (*nodeTracker, error) { +func newNodeTracker(nodeInformer coreinformers.NodeInformer, zone string) (*nodeTracker, error) { nt := &nodeTracker{ nodes: map[string]nodeInfo{}, + zone: zone, } _, err := nodeInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ @@ -118,11 +125,13 @@ func newNodeTracker(nodeInformer coreinformers.NodeInformer) (*nodeTracker, erro // - L3Gateway annotation's ip addresses have changed // - the name of the node (very rare) has changed // - the `host-addresses` annotation changed + // - node changes its zone // . No need to trigger update for any other field change. if util.NodeSubnetAnnotationChanged(oldObj, newObj) || util.NodeL3GatewayAnnotationChanged(oldObj, newObj) || oldObj.Name != newObj.Name || - util.NodeHostAddressesAnnotationChanged(oldObj, newObj) { + util.NodeHostAddressesAnnotationChanged(oldObj, newObj) || + util.NodeZoneAnnotationChanged(oldObj, newObj) { nt.updateNode(newObj) } }, @@ -152,7 +161,7 @@ func newNodeTracker(nodeInformer coreinformers.NodeInformer) (*nodeTracker, erro // updateNodeInfo updates the node info cache, and syncs all services // if it changed. -func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisID string, l3gatewayAddresses, hostAddresses []net.IP, podSubnets []*net.IPNet) { +func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisID string, l3gatewayAddresses, hostAddresses []net.IP, podSubnets []*net.IPNet, zone string) { ni := nodeInfo{ name: nodeName, l3gatewayAddresses: l3gatewayAddresses, @@ -161,6 +170,7 @@ func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisI gatewayRouterName: routerName, switchName: switchName, chassisID: chassisID, + zone: zone, } for i := range podSubnets { ni.podSubnets = append(ni.podSubnets, *podSubnets[i]) // de-pointer @@ -179,7 +189,7 @@ func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisI nt.nodes[nodeName] = ni // Resync all services - nt.resyncFn(nt.allNodes()) + nt.resyncFn(nt.getZoneNodes()) } // removeNodeWithServiceReSync removes a node from the LB -> node mapper @@ -187,7 +197,7 @@ func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisI func (nt *nodeTracker) removeNodeWithServiceReSync(nodeName string) { nt.removeNode(nodeName) nt.Lock() - nt.resyncFn(nt.allNodes()) + nt.resyncFn(nt.getZoneNodes()) nt.Unlock() } @@ -254,14 +264,18 @@ func (nt *nodeTracker) updateNode(node *v1.Node) { l3gatewayAddresses, hostAddressesIPs, hsn, + util.GetNodeZone(node), ) } -// allNodes returns a list of all nodes (and their relevant information) -func (nt *nodeTracker) allNodes() []nodeInfo { +// getZoneNodes returns a list of all nodes (and their relevant information) +// which belong to the nodeTracker 'zone' +func (nt *nodeTracker) getZoneNodes() []nodeInfo { out := make([]nodeInfo, 0, len(nt.nodes)) for _, node := range nt.nodes { - out = append(out, node) + if node.zone == nt.zone { + out = append(out, node) + } } // Sort the returned list of nodes diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index 5777dfbbde..e1bb090955 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -99,9 +99,13 @@ func NewController(client clientset.Interface, // repair controller c.repair = newRepair(serviceInformer.Lister(), nbClient) + zone, err := util.GetNBZone(nbClient) + if err != nil { + return nil, fmt.Errorf("unable to get the NB Zone : err - %w", err) + } // load balancers need to be applied to nodes, so // we need to watch Node objects for changes. - c.nodeTracker, err = newNodeTracker(nodeInformer) + c.nodeTracker, err = newNodeTracker(nodeInformer, zone) if err != nil { return nil, err } diff --git a/go-controller/pkg/ovn/controller/services/services_controller_test.go b/go-controller/pkg/ovn/controller/services/services_controller_test.go index 4e772e4837..485aa40a71 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller_test.go +++ b/go-controller/pkg/ovn/controller/services/services_controller_test.go @@ -1,6 +1,7 @@ package services import ( + "context" "fmt" "net" "strings" @@ -9,10 +10,12 @@ import ( "github.com/onsi/ginkgo" "github.com/onsi/gomega" "github.com/onsi/gomega/format" + libovsdbclient "github.com/ovn-org/libovsdb/client" globalconfig "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" v1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" @@ -50,6 +53,17 @@ func newControllerWithDBSetup(dbSetup libovsdbtest.TestSetup) (*serviceControlle recorder := record.NewFakeRecorder(10) + nbZoneFailed := false + // Try to get the NBZone. If there is an error, create NB_Global record. + // Otherwise NewController() will return error since it + // calls util.GetNBZone(). + _, err = util.GetNBZone(nbClient) + if err != nil { + nbZoneFailed = true + err = createTestNBGlobal(nbClient, "global") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + controller, err := NewController(client, nbClient, informerFactory.Core().V1().Services(), @@ -59,11 +73,19 @@ func newControllerWithDBSetup(dbSetup libovsdbtest.TestSetup) (*serviceControlle ) gomega.Expect(err).ToNot(gomega.HaveOccurred()) + if nbZoneFailed { + // Delete the NBGlobal row as this function created it. Otherwise many tests would fail while + // checking the expectedData in the NBDB. + err = deleteTestNBGlobal(nbClient, "global") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + controller.servicesSynced = alwaysReady controller.endpointSlicesSynced = alwaysReady controller.initTopLevelCache() controller.useLBGroups = true controller.useTemplates = true + return &serviceController{ controller, informerFactory.Core().V1().Services().Informer().GetStore(), @@ -470,7 +492,7 @@ func TestSyncServices(t *testing.T) { controller.serviceStore.Add(tt.service) controller.nodeTracker.nodes = defaultNodes - controller.RequestFullSync(controller.nodeTracker.allNodes()) + controller.RequestFullSync(controller.nodeTracker.getZoneNodes()) err = controller.syncService(ns + "/" + serviceName) if err != nil { @@ -685,6 +707,7 @@ func nodeConfig(nodeName string, nodeIP string) *nodeInfo { gatewayRouterName: nodeGWRouterName(nodeName), switchName: nodeSwitchName(nodeName), chassisID: nodeName, + zone: types.OvnDefaultZone, } } @@ -695,3 +718,36 @@ func temporarilyEnableGomegaMaxLengthFormat() { func restoreGomegaMaxLengthFormat(originalLength int) { format.MaxLength = originalLength } + +func createTestNBGlobal(nbClient libovsdbclient.Client, zone string) error { + nbGlobal := &nbdb.NBGlobal{Name: zone} + ops, err := nbClient.Create(nbGlobal) + if err != nil { + return err + } + + _, err = nbClient.Transact(context.Background(), ops...) + if err != nil { + return err + } + + return nil +} + +func deleteTestNBGlobal(nbClient libovsdbclient.Client, zone string) error { + p := func(nbGlobal *nbdb.NBGlobal) bool { + return true + } + + ops, err := nbClient.WhereCache(p).Delete() + if err != nil { + return err + } + + _, err = nbClient.Transact(context.Background(), ops...) + if err != nil { + return err + } + + return nil +} diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index 109fdffb5c..c5c28cffdd 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -693,3 +693,8 @@ func GetNodeZone(node *kapi.Node) string { return zoneName } + +// NodeZoneAnnotationChanged returns true if the ovnNodeZoneName in the corev1.Nodes doesn't match +func NodeZoneAnnotationChanged(oldNode, newNode *corev1.Node) bool { + return oldNode.Annotations[ovnNodeZoneName] != newNode.Annotations[ovnNodeZoneName] +} From c5ed4c286fd9bf4dccf279b470f1ef3697b8b41e Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Wed, 13 Jul 2022 18:59:12 -0500 Subject: [PATCH 09/90] Add a new option to specify the zone name. For the ovnkube-network-controller-manager, the passed zone name is validated against the zone name stored in the Northbound database. ovnkube-network-controller-manager will exit if there is a mismatch between the two. This ensures that ovnkube-network-controller-manager is connected to the proper Northboubd database. For the ovnkube-node, the passed zone name is validated against the zone name stored in the Southbound databae. ovnkube-node will exit if there is a mismatch between the two. This ensures that ovnkube-node is connected to the proper Southbound database. This patch also changes the LE lock name. If ovnkube is started in "master" mode (both cluster manager and network controller manager) or in "network controller manager" mode, then the LE lock name will be - "ovn-kubernetes-master-". Signed-off-by: Numan Siddique --- go-controller/cmd/ovnkube/ovnkube.go | 2 +- go-controller/pkg/config/config.go | 13 +++++++++++ go-controller/pkg/config/config_test.go | 5 ++++ .../network_controller_manager.go | 23 ++++++++++++++++++- .../node/default_node_network_controller.go | 23 +++++++++++++++---- test/e2e/e2e.go | 4 ++-- 6 files changed, 62 insertions(+), 8 deletions(-) diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index ee1387c8ae..d030602e3f 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -316,7 +316,7 @@ func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { case runMode.networkControllerManager: metrics.RegisterMasterBase() haConfig = &config.MasterHA - name = "ovn-kubernetes-master" + name = "ovn-kubernetes-master-" + config.Default.Zone case runMode.clusterManager: metrics.RegisterClusterManagerBase() haConfig = &config.ClusterMgrHA diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index b95453ec45..d64e4daeba 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -68,6 +68,7 @@ var ( MonitorAll: true, LFlowCacheEnable: true, RawClusterSubnets: "10.128.0.0/14/23", + Zone: types.OvnDefaultZone, } // Logging holds logging-related parsed config file parameters and command-line overrides @@ -238,6 +239,9 @@ type DefaultConfig struct { // of small UDP packets by allowing them to be aggregated before passing through // the kernel network stack. This requires a new-enough kernel (5.15 or RHEL 8.5). EnableUDPAggregation bool `gcfg:"enable-udp-aggregation"` + + // Zone name to which ovnkube-node/ovnkube-network-controller-manager belongs to + Zone string `gcfg:"zone"` } // LoggingConfig holds logging-related parsed config file parameters and command-line overrides @@ -832,6 +836,12 @@ var CommonFlags = []cli.Flag{ Destination: &cliConfig.Logging.ACLLoggingRateLimit, Value: 20, }, + &cli.StringFlag{ + Name: "zone", + Usage: "zone name to which ovnkube-node/ovnkube-network-controller-manager belongs to", + Value: Default.Zone, + Destination: &cliConfig.Default.Zone, + }, } // MonitoringFlags capture monitoring-related options @@ -1887,6 +1897,9 @@ func buildDefaultConfig(cli, file *config) error { return fmt.Errorf("cluster subnet is required") } + if Default.Zone == "" { + Default.Zone = types.OvnDefaultZone + } return nil } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index cb3b33384c..e399cef12c 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -139,6 +139,7 @@ conntrack-zone=64321 cluster-subnets=10.132.0.0/14/23 lflow-cache-limit=1000 lflow-cache-limit-kb=100000 +zone=global [kubernetes] kubeconfig=/path/to/kubeconfig @@ -299,6 +300,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Default.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ {ovntest.MustParseIPNet("10.128.0.0/14"), 23}, })) + gomega.Expect(Default.Zone).To(gomega.Equal("global")) gomega.Expect(IPv4Mode).To(gomega.Equal(true)) gomega.Expect(IPv6Mode).To(gomega.Equal(false)) gomega.Expect(HybridOverlay.Enabled).To(gomega.Equal(false)) @@ -589,6 +591,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Default.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ {ovntest.MustParseIPNet("10.132.0.0/14"), 23}, })) + gomega.Expect(Default.Zone).To(gomega.Equal("foo")) gomega.Expect(Metrics.BindAddress).To(gomega.Equal("1.1.1.1:8080")) gomega.Expect(Metrics.OVNMetricsBindAddress).To(gomega.Equal("1.1.1.2:8081")) @@ -679,6 +682,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Default.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ {ovntest.MustParseIPNet("10.130.0.0/15"), 24}, })) + gomega.Expect(Default.Zone).To(gomega.Equal("bar")) gomega.Expect(Metrics.BindAddress).To(gomega.Equal("2.2.2.2:8080")) gomega.Expect(Metrics.OVNMetricsBindAddress).To(gomega.Equal("2.2.2.3:8081")) @@ -779,6 +783,7 @@ var _ = Describe("Config Operations", func() { "-enable-multi-networkpolicy=true", "-enable-interconnect=true", "-healthz-bind-address=0.0.0.0:4321", + "-zone=bar", } err = app.Run(cliArgs) gomega.Expect(err).NotTo(gomega.HaveOccurred()) diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index a851460fd6..ff226e00f1 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -20,6 +20,7 @@ import ( ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" @@ -327,9 +328,29 @@ func (cm *networkControllerManager) initDefaultNetworkController() error { // Start the network controller manager func (cm *networkControllerManager) Start(ctx context.Context) error { klog.Info("Starting the network controller manager") + + // Make sure that the NCM zone matches with the Noruthbound db zone. + // Wait for 300s before giving up + var zone string + err := wait.PollImmediate(500*time.Millisecond, 300*time.Second, func() (bool, error) { + zone, err := util.GetNBZone(cm.nbClient) + if err != nil { + return false, fmt.Errorf("error getting the zone name from the OVN Northbound db : %w", err) + } + + if config.Default.Zone != zone { + return false, fmt.Errorf("network controller manager zone %s mismatch with the Northbound db zone %s", config.Default.Zone, zone) + } + return true, nil + }) + + if err != nil { + return fmt.Errorf("failed to start default network controller - OVN Nortboubd db zone %s doesn't match with the configured zone %s : err - %w", zone, config.Default.Zone, err) + } + cm.configureMetrics(cm.stopChan) - err := cm.configureSCTPSupport() + err = cm.configureSCTPSupport() if err != nil { return err } diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 81e684c15c..bcf59342af 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -539,6 +539,25 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { return fmt.Errorf("failed to parse kubernetes node IP address. %v", err) } + // Make sure that the node zone matches with the Southbound db zone. + // Wait for 300s before giving up + var sbZone string + err = wait.PollImmediate(500*time.Millisecond, 300*time.Second, func() (bool, error) { + sbZone, err = getOVNSBZone() + if err != nil { + return false, fmt.Errorf("failed to get the zone name from the OVN Southbound db server, err : %w", err) + } + + if config.Default.Zone != sbZone { + return false, fmt.Errorf("node %s zone %s mismatch with the Southbound zone %s", nc.name, config.Default.Zone, sbZone) + } + return true, nil + }) + + if err != nil { + return fmt.Errorf("timed out waiting for the node zone %s to match the OVN Southbound db zone, err : %w", config.Default.Zone, err) + } + if config.OvnKubeNode.Mode != types.NodeModeDPUHost { for _, auth := range []config.OvnAuthConfig{config.OvnNorth, config.OvnSouth} { if err := auth.SetDBAuth(); err != nil { @@ -625,10 +644,6 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } } - sbZone, err := getOVNSBZone() - if err != nil { - return fmt.Errorf("failed to get the zone name from the OVN Southbound db server, err : %w", err) - } if err := util.SetNodeZone(nodeAnnotator, sbZone); err != nil { return fmt.Errorf("failed to set node zone annotation for node %s: %w", nc.name, err) } diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 37d112df24..684a0ef492 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -487,10 +487,10 @@ func restartOVNKubeNodePod(clientset kubernetes.Interface, namespace string, nod func findOvnKubeMasterNode() (string, error) { - ovnkubeMasterNode, err := framework.RunKubectl(ovnNs, "get", "leases", "ovn-kubernetes-master", + ovnkubeMasterNode, err := framework.RunKubectl(ovnNs, "get", "leases", "ovn-kubernetes-master-global", "-o", "jsonpath='{.spec.holderIdentity}'") - framework.ExpectNoError(err, fmt.Sprintf("Unable to retrieve leases (ovn-kubernetes-master)"+ + framework.ExpectNoError(err, fmt.Sprintf("Unable to retrieve leases (ovn-kubernetes-master-global)"+ "from %s %v", ovnNs, err)) framework.Logf(fmt.Sprintf("master instance of ovnkube-master is running on node %s", ovnkubeMasterNode)) From fd60026074fcde42d0315994b09345475b6ccd6d Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Mon, 6 Mar 2023 16:14:43 -0500 Subject: [PATCH 10/90] Add interconnect support for secondary layer3 networks. A transit switch is created for each secondary layer3 network just like how its created for the default network. cluster manager generates a network id for each network and stores it in the node annotation - "k8s.ovn.org/network-ids". This is used by network controller manager to generate a unique tunnel key for each secondary layer3 transit switch network it creates. With this patch, E-W traffic within each secondary layer3 network works when interconnect is enabled. With this patch mulitple zones are supported both for the default network and secondary layer 3 networks (except for secondary layer2 and localnet networks). Although a user can deploy multiple zones, it is still not recommended as this feature is still not tested in the CI. Upcoming patches will add the support in kind to deploy multiple zones and test it in the CI. Signed-off-by: Numan Siddique --- .../pkg/clustermanager/clustermanager.go | 7 +- .../pkg/clustermanager/clustermanager_test.go | 17 ++ .../network_cluster_controller.go | 49 ++++-- .../network_cluster_controller_test.go | 6 +- .../secondary_network_cluster_manager.go | 54 ++++++- .../secondary_network_unit_test.go | 2 +- .../ovn/base_network_controller_namespace.go | 40 +++++ .../ovn/base_network_controller_secondary.go | 63 +++++++- go-controller/pkg/ovn/egressip.go | 4 +- go-controller/pkg/ovn/namespace.go | 15 -- go-controller/pkg/ovn/ovn.go | 33 +--- .../secondary_layer3_network_controller.go | 150 +++++++++++++++--- .../ovn/zone_interconnect/zone_ic_handler.go | 31 +++- .../zone_interconnect/zone_ic_handler_test.go | 19 +++ go-controller/pkg/util/node_annotations.go | 128 +++++++++++++++ go-controller/pkg/util/pod_annotation.go | 4 +- .../pkg/util/pod_annotation_unit_test.go | 2 +- 17 files changed, 521 insertions(+), 103 deletions(-) diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 3e27b71485..2d3447ac20 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -16,6 +16,11 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) +const ( + // ID of the default network. + defaultNetworkID = 0 +) + // ClusterManager structure is the object which manages the cluster nodes. // It creates a default network controller for the default network and a // secondary network cluster controller manager to manage the multi networks. @@ -37,7 +42,7 @@ type ClusterManager struct { // NewClusterManager creates a new cluster manager to manage the cluster nodes. func NewClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, identity string, wg *sync.WaitGroup, recorder record.EventRecorder) (*ClusterManager, error) { - defaultNetClusterController := newNetworkClusterController(ovntypes.DefaultNetworkName, config.Default.ClusterSubnets, + defaultNetClusterController := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, ovnClient, wf, config.HybridOverlay.Enabled, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) zoneClusterController, err := newZoneClusterController(ovnClient, wf) diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index 1addfe3f79..0145ff7288 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -125,6 +125,23 @@ var _ = ginkgo.Describe("Cluster Manager", func() { }, 2).Should(gomega.HaveLen(1)) } + // Check that the network id 0 is allocated for the default network + for _, n := range nodes { + gomega.Eventually(func() error { + updatedNode, err := fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), n.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + networkId, err := util.ParseNetworkIDAnnotation(updatedNode, "default") + if err != nil { + return fmt.Errorf("expected node network id annotation for node %s to have been allocated", n.Name) + } + + gomega.Expect(networkId).To(gomega.Equal(0)) + return nil + }).ShouldNot(gomega.HaveOccurred()) + } return nil } diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index d2c1a37938..aafef6f234 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -23,9 +23,13 @@ import ( ) // networkClusterController is the cluster controller for the networks. -// It listens to the node events and allocates subnet from the -// cluster subnet pool. It also allocates subnets from the hybrid overlay subnet pool -// if hybrid overlay is enabled. +// An instance of this struct is expected to be created for each network. +// A network is identified by its name and its unique id. +// It listens to the node events and does the following. +// - allocates subnet from the cluster subnet pool. It also allocates subnets +// from the hybrid overlay subnet pool if hybrid overlay is enabled. +// It stores these allocated subnets in the node annotation +// - stores the network id in each node's annotation. type networkClusterController struct { kube kube.Interface watchFactory *factory.WatchFactory @@ -38,7 +42,11 @@ type networkClusterController struct { // retry framework for nodes retryNodes *objretry.RetryFramework - networkName string + // name of the network + networkName string + // unique id of the network + networkID int + clusterSubnetAllocator *subnetallocator.HostSubnetAllocator clusterSubnets []config.CIDRNetworkEntry @@ -49,7 +57,7 @@ type networkClusterController struct { util.NetConfInfo } -func newNetworkClusterController(networkName string, clusterSubnets []config.CIDRNetworkEntry, +func newNetworkClusterController(networkName string, networkID int, clusterSubnets []config.CIDRNetworkEntry, ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, enableHybridOverlaySubnetAllocator bool, netInfo util.NetInfo, netConfInfo util.NetConfInfo) *networkClusterController { @@ -69,6 +77,7 @@ func newNetworkClusterController(networkName string, clusterSubnets []config.CID stopChan: make(chan struct{}), wg: wg, networkName: networkName, + networkID: networkID, clusterSubnetAllocator: subnetallocator.NewHostSubnetAllocator(), clusterSubnets: clusterSubnets, hybridOverlaySubnetAllocator: hybridOverlaySubnetAllocator, @@ -190,10 +199,13 @@ func (ncc *networkClusterController) handleAddUpdateNodeEvent(node *corev1.Node) return nil } - return ncc.syncNodeClusterSubnet(node) + return ncc.syncNodeNetworkAnnotations(node) } -func (ncc *networkClusterController) syncNodeClusterSubnet(node *corev1.Node) error { +// syncNodeNetworkAnnotations does 2 things +// - syncs the node's allocated subnets in the node subnet annotation +// - syncs the network id in the node network id annotation +func (ncc *networkClusterController) syncNodeNetworkAnnotations(node *corev1.Node) error { ncc.clusterSubnetAllocator.Lock() defer ncc.clusterSubnetAllocator.Unlock() @@ -203,6 +215,12 @@ func (ncc *networkClusterController) syncNodeClusterSubnet(node *corev1.Node) er klog.Warningf("Failed to get node %s host subnets annotations for network %s : %v", node.Name, ncc.networkName, err) } + networkID, err := util.ParseNetworkIDAnnotation(node, ncc.networkName) + if err != nil && !util.IsAnnotationNotSetError(err) { + // Log the error and try to allocate new subnets + klog.Warningf("Failed to get node %s network id annotations for network %s : %v", node.Name, ncc.networkName, err) + } + // On return validExistingSubnets will contain any valid subnets that // were already assigned to the node. allocatedSubnets will contain // any newly allocated subnets required to ensure that the node has one subnet @@ -218,9 +236,10 @@ func (ncc *networkClusterController) syncNodeClusterSubnet(node *corev1.Node) er // 1) new node: no existing subnets and one or more new subnets were allocated // 2) dual-stack to single-stack conversion: two existing subnets but only one will be valid, and no allocated subnets // 3) bad subnet annotation: one more existing subnets will be invalid and might have allocated a correct one - if len(existingSubnets) != len(validExistingSubnets) || len(allocatedSubnets) > 0 { + // Also update the node annotation if the networkID doesn't match + if len(existingSubnets) != len(validExistingSubnets) || len(allocatedSubnets) > 0 || ncc.networkID != networkID { updatedSubnetsMap := map[string][]*net.IPNet{ncc.networkName: validExistingSubnets} - err = ncc.updateNodeSubnetAnnotationWithRetry(node.Name, updatedSubnetsMap) + err = ncc.updateNodeNetworkAnnotationsWithRetry(node.Name, updatedSubnetsMap, ncc.networkID) if err != nil { if errR := ncc.clusterSubnetAllocator.ReleaseNodeSubnets(node.Name, allocatedSubnets...); errR != nil { klog.Warningf("Error releasing node %s subnets: %v", node.Name, errR) @@ -284,7 +303,8 @@ func (ncc *networkClusterController) syncNodes(nodes []interface{}) error { return nil } -func (ncc *networkClusterController) updateNodeSubnetAnnotationWithRetry(nodeName string, hostSubnetsMap map[string][]*net.IPNet) error { +// updateNodeNetworkAnnotationsWithRetry will update the node's subnet annotation and network id annotation +func (ncc *networkClusterController) updateNodeNetworkAnnotationsWithRetry(nodeName string, hostSubnetsMap map[string][]*net.IPNet, networkId int) error { // Retry if it fails because of potential conflict which is transient. Return error in the // case of other errors (say temporary API server down), and it will be taken care of by the // retry mechanism. @@ -303,6 +323,12 @@ func (ncc *networkClusterController) updateNodeSubnetAnnotationWithRetry(nodeNam node.Name, util.JoinIPNets(hostSubnets, ",")) } } + + cnode.Annotations, err = util.UpdateNetworkIDAnnotation(cnode.Annotations, ncc.networkName, networkId) + if err != nil { + return fmt.Errorf("failed to update node %q network id annotation %d for network %s", + node.Name, networkId, ncc.networkName) + } return ncc.kube.UpdateNode(cnode) }) if resultErr != nil { @@ -331,7 +357,8 @@ func (ncc *networkClusterController) Cleanup(netName string) error { } hostSubnetsMap := map[string][]*net.IPNet{ncc.networkName: nil} - err = ncc.updateNodeSubnetAnnotationWithRetry(node.Name, hostSubnetsMap) + // passing util.InvalidNetworkID deletes the network id annotation for the network. + err = ncc.updateNodeNetworkAnnotationsWithRetry(node.Name, hostSubnetsMap, util.InvalidNetworkID) if err != nil { return fmt.Errorf("failed to clear node %q subnet annotation for network %s", node.Name, ncc.networkName) diff --git a/go-controller/pkg/clustermanager/network_cluster_controller_test.go b/go-controller/pkg/clustermanager/network_cluster_controller_test.go index 5cc77ab408..83e54f441c 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller_test.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller_test.go @@ -75,7 +75,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { err = f.Start() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, config.Default.ClusterSubnets, + ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, fakeClient, f, false, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) ncc.Start(ctx.Context) defer ncc.Stop() @@ -124,7 +124,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { err = f.Start() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, config.Default.ClusterSubnets, + ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, fakeClient, f, false, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) ncc.Start(ctx.Context) defer ncc.Stop() @@ -176,7 +176,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { err = f.Start() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, config.Default.ClusterSubnets, + ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, fakeClient, f, false, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) ncc.Start(ctx.Context) defer ncc.Stop() diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index 66281d4235..f00548a348 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -1,6 +1,8 @@ package clustermanager import ( + "fmt" + "github.com/containernetworking/cni/pkg/types" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" @@ -12,6 +14,11 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) +const ( + // Maximum secondary network IDs that can be generated. An arbitrary value is chosen. + maxSecondaryNetworkIDs = 4096 +) + // secondaryNetworkClusterManager object manages the multi net-attach-def controllers. // It implements networkAttachDefController.NetworkControllerManager and can be used // by NetAttachDefinitionController to add and delete NADs. @@ -20,16 +27,28 @@ type secondaryNetworkClusterManager struct { nadController *nad.NetAttachDefinitionController ovnClient *util.OVNClusterManagerClientset watchFactory *factory.WatchFactory + // networkIDAllocator is used to allocate a unique ID for each secondary layer3 network + networkIDAllocator *idAllocator } func newSecondaryNetworkClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, recorder record.EventRecorder) (*secondaryNetworkClusterManager, error) { klog.Infof("Creating secondary network cluster manager") + networkIDAllocator, err := NewIDAllocator("NetworkIDs", maxSecondaryNetworkIDs) + if err != nil { + return nil, fmt.Errorf("failed to create an IdAllocator for the secondary network ids, err: %v", err) + } + + // Reserve the id 0 for the default network. + if err := networkIDAllocator.reserveID("default", defaultNetworkID); err != nil { + return nil, fmt.Errorf("idAllocator failed to reserve defaultNetworkID %d", defaultNetworkID) + } sncm := &secondaryNetworkClusterManager{ - ovnClient: ovnClient, - watchFactory: wf, + ovnClient: ovnClient, + watchFactory: wf, + networkIDAllocator: networkIDAllocator, } - var err error + sncm.nadController, err = nad.NewNetAttachDefinitionController( "cluster-manager", sncm, ovnClient.NetworkAttchDefClient, recorder) if err != nil { @@ -42,6 +61,26 @@ func newSecondaryNetworkClusterManager(ovnClient *util.OVNClusterManagerClientse // needed logical entities func (sncm *secondaryNetworkClusterManager) Start() error { klog.Infof("Starting secondary network cluster manager") + + // Reserve the network ids in the id allocator for the existing secondary layer3 networks. + nodes, err := sncm.watchFactory.GetNodes() + if err != nil { + return fmt.Errorf("error getting the nodes from the watch factory : err - %v", err) + } + + for _, n := range nodes { + networkIdsMap, err := util.GetNodeNetworkIDsAnnotationNetworkIDs(n) + if err == nil { + for networkName, id := range networkIdsMap { + // Reserver the id for the network name. We can safely + // ignore any errors if there are duplicate ids or if + // two networks have the same id. We will resync the node + // annotations correctly when the network controller + // is created. + _ = sncm.networkIDAllocator.reserveID(networkName, id) + } + } + } return sncm.nadController.Start() } @@ -57,8 +96,13 @@ func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetI netConfInfo util.NetConfInfo) (nad.NetworkController, error) { topoType := netConfInfo.TopologyType() if topoType == ovntypes.Layer3Topology { + networkId, err := sncm.networkIDAllocator.allocateID(nInfo.GetNetworkName()) + if err != nil { + return nil, fmt.Errorf("failed to create NetworkController for secondary layer3 network %s : %w", nInfo.GetNetworkName(), err) + } + layer3NetConfInfo := netConfInfo.(*util.Layer3NetConfInfo) - sncc := newNetworkClusterController(nInfo.GetNetworkName(), layer3NetConfInfo.ClusterSubnets, + sncc := newNetworkClusterController(nInfo.GetNetworkName(), networkId, layer3NetConfInfo.ClusterSubnets, sncm.ovnClient, sncm.watchFactory, false, nInfo, netConfInfo) return sncc, nil } @@ -122,6 +166,6 @@ func (sncm *secondaryNetworkClusterManager) CleanupDeletedNetworks(allController func (sncm *secondaryNetworkClusterManager) newDummyLayer3NetworkController(netName string) nad.NetworkController { netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: ovntypes.Layer3Topology}) layer3NetConfInfo := &util.Layer3NetConfInfo{} - return newNetworkClusterController(netInfo.GetNetworkName(), layer3NetConfInfo.ClusterSubnets, + return newNetworkClusterController(netInfo.GetNetworkName(), util.InvalidNetworkID, layer3NetConfInfo.ClusterSubnets, sncm.ovnClient, sncm.watchFactory, false, netInfo, layer3NetConfInfo) } diff --git a/go-controller/pkg/clustermanager/secondary_network_unit_test.go b/go-controller/pkg/clustermanager/secondary_network_unit_test.go index 851321d46a..10301901be 100644 --- a/go-controller/pkg/clustermanager/secondary_network_unit_test.go +++ b/go-controller/pkg/clustermanager/secondary_network_unit_test.go @@ -226,7 +226,7 @@ var _ = ginkgo.Describe("Secondary Layer3 Cluster Controller Manager", func() { // So testing the cleanup one at a time. netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer3Topology}) layer3NetConfInfo := &util.Layer3NetConfInfo{} - oc := newNetworkClusterController(netInfo.GetNetworkName(), layer3NetConfInfo.ClusterSubnets, + oc := newNetworkClusterController(netInfo.GetNetworkName(), util.InvalidNetworkID, layer3NetConfInfo.ClusterSubnets, sncm.ovnClient, sncm.watchFactory, false, netInfo, layer3NetConfInfo) nadControllers := []nad.NetworkController{oc} diff --git a/go-controller/pkg/ovn/base_network_controller_namespace.go b/go-controller/pkg/ovn/base_network_controller_namespace.go index 7cc1cb7d34..fe817486c4 100644 --- a/go-controller/pkg/ovn/base_network_controller_namespace.go +++ b/go-controller/pkg/ovn/base_network_controller_namespace.go @@ -376,3 +376,43 @@ func (bnc *BaseNetworkController) createNamespaceAddrSetAllPods(ns string, ips [ dbIDs := getNamespaceAddrSetDbIDs(ns, bnc.controllerName) return bnc.addressSetFactory.NewAddressSet(dbIDs, ips) } + +// removeRemoteZonePodFromNamespaceAddressset tries to remove the remote zone pod ips from the pod namespace address set. +// failure indicates it should be retried later. +func (bsnc *BaseNetworkController) removeRemoteZonePodFromNamespaceAddressSet(pod *kapi.Pod) error { + podDesc := fmt.Sprintf("pod %s/%s/%s", bsnc.GetNetworkName(), pod.Namespace, pod.Name) + podIfAddrs, err := util.GetPodCIDRsWithFullMask(pod, bsnc.NetInfo) + if err != nil { + return fmt.Errorf("failed to get pod ips for the pod %s/%s : %w", pod.Namespace, pod.Name, err) + } + + // Remove the pod ips from the namespace address set. Before that check if its a completed pod and + // make sure that the ips are not colliding with other pod. + shouldRelease := true + if util.PodCompleted(pod) { + shouldRelease, err := bsnc.canReleasePodIPs(podIfAddrs) + if err != nil { + klog.Errorf("Unable to determine if completed remote pod IP is in use by another pod. "+ + "Will not release pod %s/%s IP: %#v from namespace addressset. %w", pod.Namespace, pod.Name, podIfAddrs, err) + shouldRelease = false + } + + if !shouldRelease { + klog.Infof("Cannot release IP address: %s for %s/%s from namespace address set. Detected another pod"+ + " using this IP: %s/%s", util.JoinIPNetIPs(podIfAddrs, " "), pod.Namespace, pod.Name) + } + } + + if shouldRelease { + ops, err := bsnc.deletePodFromNamespace(pod.Namespace, podIfAddrs, "") + if err != nil { + return fmt.Errorf("failed to delete remote pod %s's IP from namespace: %w", podDesc, err) + } + + _, err = libovsdbops.TransactAndCheck(bsnc.nbClient, ops) + if err != nil { + return fmt.Errorf("could not delete remote pod IPs from the namespace address set - %w", err) + } + } + return nil +} diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index d8ad2ad3ab..f888df5fc3 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -188,6 +188,16 @@ func (bsnc *BaseSecondaryNetworkController) ensurePodForSecondaryNetwork(pod *ka return nil } + if bsnc.isPodScheduledinLocalZone(pod) { + return bsnc.ensureLocalZonePodForSecondaryNetwork(pod, addPort) + } + + return bsnc.ensureRemoteZonePodForSecondaryNetwork(pod, addPort) +} + +// ensureLocalZonePodForSecondaryNetwork tries to set up secondary network for a local zone pod. It returns nil on success and error +// on failure; failure indicates the pod set up should be retried later. +func (bsnc *BaseSecondaryNetworkController) ensureLocalZonePodForSecondaryNetwork(pod *kapi.Pod, addPort bool) error { // If a node does not have an assigned hostsubnet don't wait for the logical switch to appear switchName, err := bsnc.getExpectedSwitchName(pod) if err != nil { @@ -231,6 +241,38 @@ func (bsnc *BaseSecondaryNetworkController) ensurePodForSecondaryNetwork(pod *ka return nil } +// ensureRemoteZonePodForSecondaryNetwork tries to set up remote zone pod bits required to interconnect it. +// - Adds the remote pod ips to the pod namespace address set for network policy and egress gw +// +// It returns nil on success and error on failure; failur indicates the pod set up should be retried later. +func (bsnc *BaseSecondaryNetworkController) ensureRemoteZonePodForSecondaryNetwork(pod *kapi.Pod, addPort bool) error { + + if !bsnc.doesNetworkRequireIPAM() { + return nil + } + + podIfAddrs, err := util.GetPodCIDRsWithFullMask(pod, bsnc.NetInfo) + if err != nil { + return fmt.Errorf("failed to get pod ips for the pod %s/%s : %w", pod.Namespace, pod.Name, err) + } + if len(podIfAddrs) == 0 { + return nil + } + + // Ensure the namespace/nsInfo exists + ops, err := bsnc.addPodToNamespaceForSecondaryNetwork(pod.Namespace, podIfAddrs) + if err != nil { + return err + } + + _, err = libovsdbops.TransactAndCheck(bsnc.nbClient, ops) + if err != nil { + return fmt.Errorf("could not add pod IPs to the namespace address set - %w", err) + } + + return nil +} + func (bsnc *BaseSecondaryNetworkController) addLogicalPortToNetworkForNAD(pod *kapi.Pod, nadName, switchName string, network *nadapi.NetworkSelectionElement) error { var libovsdbExecuteTime time.Duration @@ -283,16 +325,27 @@ func (bsnc *BaseSecondaryNetworkController) addLogicalPortToNetworkForNAD(pod *k return nil } -// removePodForSecondaryNetwork tried to tear down a for on a secondary network. It returns nil on success -// and error on failure; failure indicates the pod tear down should be retried later. +// removePodForSecondaryNetwork tried to tear down a pod. It returns nil on success and error on failure; +// failure indicates the pod tear down should be retried later. func (bsnc *BaseSecondaryNetworkController) removePodForSecondaryNetwork(pod *kapi.Pod, portInfoMap map[string]*lpInfo) error { - podDesc := pod.Namespace + "/" + pod.Name - klog.Infof("Deleting pod: %s for network %s", podDesc, bsnc.GetNetworkName()) - if util.PodWantsHostNetwork(pod) || !util.PodScheduled(pod) { return nil } + if bsnc.isPodScheduledinLocalZone(pod) { + return bsnc.removeLocalZonePodForSecondaryNetwork(pod, portInfoMap) + } + + // For remote pods, we just need to remove the pod IPs from the pod namespace address set + return bsnc.removeRemoteZonePodFromNamespaceAddressSet(pod) +} + +// removePodForSecondaryNetwork tried to tear down a local zone pod on a secondary network. It returns nil on success +// and error on failure; failure indicates the pod tear down should be retried later. +func (bsnc *BaseSecondaryNetworkController) removeLocalZonePodForSecondaryNetwork(pod *kapi.Pod, portInfoMap map[string]*lpInfo) error { + podDesc := pod.Namespace + "/" + pod.Name + klog.Infof("Deleting pod: %s for network %s", podDesc, bsnc.GetNetworkName()) + // for a specific NAD belongs to this network, Pod's logical port might already be created half-way // without its lpInfo cache being created; need to deleted resources created for that NAD as well. // So, first get all nadNames from pod annotation, but handle NADs belong to this network only. diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index b102116953..5afc3fd775 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1292,7 +1292,7 @@ func (oc *DefaultNetworkController) deletePodEgressIPAssignments(name string, st podStatus.standbyEgressIPNames.Delete(name) return nil } - podIPs, err := util.GetPodCIDRsWithFullMask(pod) + podIPs, err := util.GetPodCIDRsWithFullMask(pod, oc.NetInfo) if err != nil { return err } @@ -2381,7 +2381,7 @@ func (e *egressIPController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podN if err != nil { return nil, err } - podIPs, err := util.GetPodCIDRsWithFullMask(pod) + podIPs, err := util.GetPodCIDRsWithFullMask(pod, &util.DefaultNetInfo{}) if err != nil { return nil, err } diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index c3550ea1e2..281b9cc20f 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -87,21 +87,6 @@ func (oc *DefaultNetworkController) addRemotePodToNamespace(ns string, ips []*ne return err } -func (oc *DefaultNetworkController) deleteRemotePodFromNamespace(ns string, ips []*net.IPNet) error { - nsInfo, nsUnlock := oc.getNamespaceLocked(ns, true) - if nsInfo == nil { - return nil - } - defer nsUnlock() - - if nsInfo.addressSet != nil { - if err := nsInfo.addressSet.DeleteIPs(createIPAddressSlice(ips)); err != nil { - return err - } - } - return nil -} - func createIPAddressSlice(ips []*net.IPNet) []net.IP { ipAddrs := make([]net.IP, 0) for _, ip := range ips { diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index a280f06728..46e8569731 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -17,7 +17,6 @@ import ( egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" svccontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/services" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -183,7 +182,7 @@ func (oc *DefaultNetworkController) ensureRemoteZonePod(oldPod, pod *kapi.Pod, a if len(pod.Status.PodIPs) < 1 { return nil } - podIfAddrs, err := util.GetPodCIDRsWithFullMask(pod) + podIfAddrs, err := util.GetPodCIDRsWithFullMask(pod, oc.NetInfo) if err != nil { return fmt.Errorf("failed to get pod ips for the pod %s/%s : %w", pod.Namespace, pod.Name, err) } @@ -255,33 +254,8 @@ func (oc *DefaultNetworkController) removeLocalZonePod(pod *kapi.Pod, portInfo * // It removes the remote pod ips from the namespace address set and if its an external gw pod, removes // its routes. func (oc *DefaultNetworkController) removeRemoteZonePod(pod *kapi.Pod) error { - podDesc := fmt.Sprintf("pod %s/%s/%s", types.DefaultNetworkName, pod.Namespace, pod.Name) - podIfAddrs, err := util.GetPodCIDRsWithFullMask(pod) - if err != nil { - return fmt.Errorf("failed to get pod ips for the pod %s/%s : %w", pod.Namespace, pod.Name, err) - } - - // Remove the pod ips from the namespace address set. Before that check if its a completed pod and - // make sure that the ips are not colliding with other pod. - shouldRelease := true - if util.PodCompleted(pod) { - shouldRelease, err := oc.canReleasePodIPs(podIfAddrs) - if err != nil { - klog.Errorf("Unable to determine if completed remote pod IP is in use by another pod. "+ - "Will not release pod %s/%s IP: %#v from namespace addressset. %w", pod.Namespace, pod.Name, podIfAddrs, err) - shouldRelease = false - } - - if !shouldRelease { - klog.Infof("Cannot release IP address: %s for %s/%s from namespace address set. Detected another pod"+ - " using this IP: %s/%s", util.JoinIPNetIPs(podIfAddrs, " "), pod.Namespace, pod.Name) - } - } - - if shouldRelease { - if err := oc.deleteRemotePodFromNamespace(pod.Namespace, podIfAddrs); err != nil { - return fmt.Errorf("failed to delete remote pod %s's IP from namespace: %w", podDesc, err) - } + if err := oc.removeRemoteZonePodFromNamespaceAddressSet(pod); err != nil { + return fmt.Errorf("failed to remove the remote zone pod : %w", err) } if util.PodWantsHostNetwork(pod) { @@ -290,7 +264,6 @@ func (oc *DefaultNetworkController) removeRemoteZonePod(pod *kapi.Pod) error { return fmt.Errorf("unable to delete external gateway routes for remote pod %s: %w", getPodNamespacedName(pod), err) } - return nil } return nil diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index a5a9e459d2..0ce503d097 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -11,12 +11,14 @@ import ( mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" "github.com/ovn-org/libovsdb/ovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" + zoneic "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/zone_interconnect" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -116,21 +118,27 @@ func (h *secondaryLayer3NetworkControllerEventHandler) AddResource(obj interface if !ok { return fmt.Errorf("could not cast %T object to *kapi.Node", obj) } - var nodeParams *nodeSyncs - if fromRetryLoop { - _, nodeSync := h.oc.addNodeFailed.Load(node.Name) - _, clusterRtrSync := h.oc.nodeClusterRouterPortFailed.Load(node.Name) - nodeParams = &nodeSyncs{syncNode: nodeSync, syncClusterRouterPort: clusterRtrSync} - } else { - nodeParams = &nodeSyncs{syncNode: true, syncClusterRouterPort: true} - } - if err := h.oc.addUpdateNodeEvent(node, nodeParams); err != nil { - klog.Errorf("Node add failed for %s, will try again later: %v", - node.Name, err) - return err + if h.oc.isLocalZoneNode(node) { + var nodeParams *nodeSyncs + if fromRetryLoop { + _, nodeSync := h.oc.addNodeFailed.Load(node.Name) + _, clusterRtrSync := h.oc.nodeClusterRouterPortFailed.Load(node.Name) + _, syncZoneIC := h.oc.syncZoneICFailed.Load(node.Name) + nodeParams = &nodeSyncs{syncNode: nodeSync, syncClusterRouterPort: clusterRtrSync, syncZoneIC: syncZoneIC} + } else { + nodeParams = &nodeSyncs{syncNode: true, syncClusterRouterPort: true, syncZoneIC: config.OVNKubernetesFeature.EnableInterconnect} + } + if err := h.oc.addUpdateLocalNodeEvent(node, nodeParams); err != nil { + klog.Errorf("Node add failed for %s, will try again later: %v", + node.Name, err) + return err + } + } else { + if err := h.oc.addUpdateRemoteNodeEvent(node, config.OVNKubernetesFeature.EnableInterconnect); err != nil { + return err + } } - default: return h.oc.AddSecondaryNetworkResourceCommon(h.objType, obj) } @@ -152,12 +160,27 @@ func (h *secondaryLayer3NetworkControllerEventHandler) UpdateResource(oldObj, ne if !ok { return fmt.Errorf("could not cast oldObj of type %T to *kapi.Node", oldObj) } - // determine what actually changed in this update - _, nodeSync := h.oc.addNodeFailed.Load(newNode.Name) - _, failed := h.oc.nodeClusterRouterPortFailed.Load(newNode.Name) - clusterRtrSync := failed || nodeChassisChanged(oldNode, newNode) || nodeSubnetChanged(oldNode, newNode) + if h.oc.isLocalZoneNode(newNode) { + var nodeSyncsParam *nodeSyncs + if h.oc.isLocalZoneNode(oldNode) { + // determine what actually changed in this update + _, nodeSync := h.oc.addNodeFailed.Load(newNode.Name) + _, failed := h.oc.nodeClusterRouterPortFailed.Load(newNode.Name) + clusterRtrSync := failed || nodeChassisChanged(oldNode, newNode) || nodeSubnetChanged(oldNode, newNode) + _, syncZoneIC := h.oc.syncZoneICFailed.Load(newNode.Name) + syncZoneIC = syncZoneIC || util.NodeNetworkIDAnnotationChanged(oldNode, newNode, h.oc.GetNetworkName()) + nodeSyncsParam = &nodeSyncs{syncNode: nodeSync, syncClusterRouterPort: clusterRtrSync, syncZoneIC: syncZoneIC} + } else { + klog.Infof("Node %s moved from the remote zone %s to local zone.", + newNode.Name, util.GetNodeZone(oldNode), util.GetNodeZone(newNode)) + // The node is now a local zone node. Trigger a full node sync. + nodeSyncsParam = &nodeSyncs{syncNode: true, syncClusterRouterPort: true, syncZoneIC: config.OVNKubernetesFeature.EnableInterconnect} + } - return h.oc.addUpdateNodeEvent(newNode, &nodeSyncs{syncNode: nodeSync, syncClusterRouterPort: clusterRtrSync}) + return h.oc.addUpdateLocalNodeEvent(newNode, nodeSyncsParam) + } else { + return h.oc.addUpdateRemoteNodeEvent(newNode, config.OVNKubernetesFeature.EnableInterconnect) + } default: return h.oc.UpdateSecondaryNetworkResourceCommon(h.objType, oldObj, newObj, inRetryCache) } @@ -189,7 +212,7 @@ func (h *secondaryLayer3NetworkControllerEventHandler) SyncFunc(objs []interface } else { switch h.objType { case factory.PodType: - syncFunc = h.oc.syncPodsForSecondaryNetwork + syncFunc = h.oc.syncPods case factory.NodeType: syncFunc = h.oc.syncNodes @@ -224,6 +247,9 @@ type SecondaryLayer3NetworkController struct { // Node-specific syncMaps used by node event handler addNodeFailed sync.Map nodeClusterRouterPortFailed sync.Map + syncZoneICFailed sync.Map + + zoneICHandler *zoneic.ZoneInterconnectHandler } // NewSecondaryLayer3NetworkController create a new OVN controller for the given secondary layer3 NAD @@ -231,6 +257,10 @@ func NewSecondaryLayer3NetworkController(cnci *CommonNetworkControllerInfo, netI netconfInfo util.NetConfInfo, addressSetFactory addressset.AddressSetFactory) *SecondaryLayer3NetworkController { stopChan := make(chan struct{}) ipv4Mode, ipv6Mode := netconfInfo.IPMode() + var zoneICHandler *zoneic.ZoneInterconnectHandler + if config.OVNKubernetesFeature.EnableInterconnect { + zoneICHandler = zoneic.NewZoneInterconnectHandler(netInfo, cnci.nbClient, cnci.sbClient) + } // controllerName must be unique to identify db object owned by given controller if addressSetFactory == nil { addressSetFactory = addressset.NewOvnAddressSetFactory(cnci.nbClient, ipv4Mode, ipv6Mode) @@ -252,10 +282,13 @@ func NewSecondaryLayer3NetworkController(cnci *CommonNetworkControllerInfo, netI podSelectorAddressSets: syncmap.NewSyncMap[*PodSelectorAddressSet](), stopChan: stopChan, wg: &sync.WaitGroup{}, + localZoneNodes: &sync.Map{}, }, }, addNodeFailed: sync.Map{}, nodeClusterRouterPortFailed: sync.Map{}, + syncZoneICFailed: sync.Map{}, + zoneICHandler: zoneICHandler, } // disable multicast support for secondary networks // TBD: changes needs to be made to support multicast in secondary networks @@ -368,6 +401,12 @@ func (oc *SecondaryLayer3NetworkController) Cleanup(netName string) error { if err != nil { return fmt.Errorf("failed to deleting routers/switches of network %s: %v", netName, err) } + + if config.OVNKubernetesFeature.EnableInterconnect { + if err = oc.zoneICHandler.Cleanup(); err != nil { + return fmt.Errorf("failed to delete interconnect transit switch of network %s: %v", netName, err) + } + } return nil } @@ -422,11 +461,13 @@ func (oc *SecondaryLayer3NetworkController) Init() error { return err } -func (oc *SecondaryLayer3NetworkController) addUpdateNodeEvent(node *kapi.Node, nSyncs *nodeSyncs) error { +func (oc *SecondaryLayer3NetworkController) addUpdateLocalNodeEvent(node *kapi.Node, nSyncs *nodeSyncs) error { var hostSubnets []*net.IPNet var errs []error var err error + _, _ = oc.localZoneNodes.LoadOrStore(node.Name, true) + if noHostSubnet := util.NoHostSubnet(node); noHostSubnet { err := oc.lsManager.AddNoHostSubnetSwitch(oc.GetNetworkScopedName(node.Name)) if err != nil { @@ -440,6 +481,7 @@ func (oc *SecondaryLayer3NetworkController) addUpdateNodeEvent(node *kapi.Node, if hostSubnets, err = oc.addNode(node); err != nil { oc.addNodeFailed.Store(node.Name, true) oc.nodeClusterRouterPortFailed.Store(node.Name, true) + oc.syncZoneICFailed.Store(node.Name, true) err = fmt.Errorf("nodeAdd: error adding node %q for network %s: %w", node.Name, oc.GetNetworkName(), err) oc.recordNodeErrorEvent(node, err) return err @@ -462,6 +504,15 @@ func (oc *SecondaryLayer3NetworkController) addUpdateNodeEvent(node *kapi.Node, errs = append(errs, errors...) } + if nSyncs.syncZoneIC && config.OVNKubernetesFeature.EnableInterconnect { + if err := oc.zoneICHandler.AddLocalZoneNode(node); err != nil { + errs = append(errs, err) + oc.syncZoneICFailed.Store(node.Name, true) + } else { + oc.syncZoneICFailed.Delete(node.Name) + } + } + err = kerrors.NewAggregate(errs) if err != nil { oc.recordNodeErrorEvent(node, err) @@ -469,6 +520,27 @@ func (oc *SecondaryLayer3NetworkController) addUpdateNodeEvent(node *kapi.Node, return err } +func (oc *SecondaryLayer3NetworkController) addUpdateRemoteNodeEvent(node *kapi.Node, syncZoneIc bool) error { + _, present := oc.localZoneNodes.Load(node.Name) + + if present { + if err := oc.deleteNodeEvent(node); err != nil { + return err + } + } + + var err error + if syncZoneIc && config.OVNKubernetesFeature.EnableInterconnect { + if err = oc.zoneICHandler.AddRemoteZoneNode(node); err != nil { + err = fmt.Errorf("failed to add the remote zone node [%s] to the zone interconnect handler, err : %v", node.Name, err) + oc.syncZoneICFailed.Store(node.Name, true) + } else { + oc.syncZoneICFailed.Delete(node.Name) + } + } + return err +} + func (oc *SecondaryLayer3NetworkController) addNode(node *kapi.Node) ([]*net.IPNet, error) { // Node subnet for the secondary layer3 network is allocated by cluster manager. // Make sure that the node is allocated with the subnet before proceeding @@ -493,9 +565,17 @@ func (oc *SecondaryLayer3NetworkController) deleteNodeEvent(node *kapi.Node) err return err } + oc.localZoneNodes.Delete(node.Name) + oc.lsManager.DeleteSwitch(oc.GetNetworkScopedName(node.Name)) oc.addNodeFailed.Delete(node.Name) oc.nodeClusterRouterPortFailed.Delete(node.Name) + if config.OVNKubernetesFeature.EnableInterconnect { + if err := oc.zoneICHandler.DeleteNode(node); err != nil { + return err + } + oc.syncZoneICFailed.Delete(node.Name) + } return nil } @@ -522,7 +602,11 @@ func (oc *SecondaryLayer3NetworkController) syncNodes(nodes []interface{}) error continue } - foundNodes.Insert(node.Name) + // Add the node to the foundNodes only if it belongs to the local zone. + if oc.isLocalZoneNode(node) { + foundNodes.Insert(node.Name) + oc.localZoneNodes.Store(node.Name, true) + } } p := func(item *nbdb.LogicalSwitch) bool { @@ -544,5 +628,29 @@ func (oc *SecondaryLayer3NetworkController) syncNodes(nodes []interface{}) error } } } + + if config.OVNKubernetesFeature.EnableInterconnect { + if err := oc.zoneICHandler.SyncNodes(nodes); err != nil { + return fmt.Errorf("zoneICHandler failed to sync nodes: error: %w", err) + } + } + return nil } + +// syncPods syncs the pods for layer3 secondary networks. +func (oc *SecondaryLayer3NetworkController) syncPods(pods []interface{}) error { + localZonePodIfaces := make([]interface{}, 0, len(pods)) + // Exclude the remote zone pods and call syncPodsForSecondaryNetwork + for _, podInterface := range pods { + pod, ok := podInterface.(*kapi.Pod) + if !ok { + return fmt.Errorf("spurious object in syncPods: %v", podInterface) + } + if oc.isPodScheduledinLocalZone(pod) { + localZonePodIfaces = append(localZonePodIfaces, podInterface) + } + } + + return oc.syncPodsForSecondaryNetwork(localZonePodIfaces) +} diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go index 0af8c4c64d..45c3b13d02 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go @@ -21,10 +21,11 @@ import ( ) const ( - transitSwitchTunnelKey = "16711683" - lportTypeRouter = "router" - lportTypeRouterAddr = "router" - lportTypeRemote = "remote" + lportTypeRouter = "router" + lportTypeRouterAddr = "router" + lportTypeRemote = "remote" + + BaseTransitSwitchTunnelKey = 16711683 ) /* @@ -235,6 +236,12 @@ func (zic *ZoneInterconnectHandler) SyncNodes(kNodes []interface{}) error { return nil } +// Cleanup deletes the transit switch for the network +func (zic *ZoneInterconnectHandler) Cleanup() error { + klog.Infof("Deleting the transit switch %s for the network %s", zic.networkTransitSwitchName, zic.GetNetworkName()) + return libovsdbops.DeleteLogicalSwitch(zic.nbClient, zic.networkTransitSwitchName) +} + // createLocalZoneNodeResources creates the local zone node resources for interconnect // - creates Transit switch if it doesn't yet exit // - creates a logical switch port of type "router" in the transit switch with the name as - .tstor- @@ -249,17 +256,23 @@ func (zic *ZoneInterconnectHandler) createLocalZoneNodeResources(node *corev1.No return fmt.Errorf("failed to get the node transit switch port ips for node %s: %w", node.Name, err) } + networkId, err := util.ParseNetworkIDAnnotation(node, zic.GetNetworkName()) + if err != nil { + return fmt.Errorf("failed to get the network id for the network %s on node %s: %v", zic.GetNetworkName(), node.Name, err) + } + transitRouterPortMac := util.IPAddrToHWAddr(nodeTransitSwitchPortIPs[0].IP) var transitRouterPortNetworks []string for _, ip := range nodeTransitSwitchPortIPs { transitRouterPortNetworks = append(transitRouterPortNetworks, ip.String()) } + transitSwitchTunnelKey := BaseTransitSwitchTunnelKey + networkId ts := &nbdb.LogicalSwitch{ Name: zic.networkTransitSwitchName, OtherConfig: map[string]string{ "interconn-ts": zic.networkTransitSwitchName, - "requested-tnl-key": transitSwitchTunnelKey, + "requested-tnl-key": strconv.Itoa(transitSwitchTunnelKey), "mcast_snoop": "true", "mcast_flood_unregistered": "true", }, @@ -321,11 +334,17 @@ func (zic *ZoneInterconnectHandler) createRemoteZoneNodeResources(node *corev1.N return fmt.Errorf("failed to get the node transit switch port Ips : %w", err) } + networkId, err := util.ParseNetworkIDAnnotation(node, zic.GetNetworkName()) + if err != nil { + return fmt.Errorf("failed to get the network id for the network %s on node %s: %v", zic.GetNetworkName(), node.Name, err) + } + + transitSwitchTunnelKey := BaseTransitSwitchTunnelKey + networkId ts := &nbdb.LogicalSwitch{ Name: zic.networkTransitSwitchName, OtherConfig: map[string]string{ "interconn-ts": zic.networkTransitSwitchName, - "requested-tnl-key": transitSwitchTunnelKey, + "requested-tnl-key": strconv.Itoa(transitSwitchTunnelKey), "mcast_snoop": "true", "mcast_flood_unregistered": "true", }, diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go index 2b8a3e6931..9d77499f56 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go @@ -42,6 +42,9 @@ const ( // ovnNodeSubnetsAnnotation is the node annotation name to store the node subnets. ovnNodeSubnetsAnnotation = "k8s.ovn.org/node-subnets" + + // ovnNodeNetworkIDsAnnotation is the node annotation name to store the network ids. + ovnNodeNetworkIDsAnnotation = "k8s.ovn.org/network-ids" ) func newClusterJoinSwitch() *nbdb.LogicalSwitch { @@ -302,6 +305,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.2.0/24\"]}", ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.2/16\"}", ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.2/16\"}", + ovnNodeNetworkIDsAnnotation: "{\"default\":\"0\"}", }, }, Status: corev1.NodeStatus{ @@ -319,6 +323,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.3.0/24\"]}", ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.3/16\"}", ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.3/16\"}", + ovnNodeNetworkIDsAnnotation: "{\"default\":\"0\"}", }, }, Status: corev1.NodeStatus{ @@ -336,6 +341,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.4.0/24\"]}", ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.4/16\"}", ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.4/16\"}", + ovnNodeNetworkIDsAnnotation: "{\"default\":\"0\"}", }, }, Status: corev1.NodeStatus{ @@ -502,6 +508,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.2.0/24\"]}", ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.2/16\"}", ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.2/16\"}", + ovnNodeNetworkIDsAnnotation: "{\"blue\":\"1\"}", }, }, Status: corev1.NodeStatus{ @@ -519,6 +526,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.3.0/24\"]}", ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.3/16\"}", ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.3/16\"}", + ovnNodeNetworkIDsAnnotation: "{\"blue\":\"1\"}", }, }, Status: corev1.NodeStatus{ @@ -536,6 +544,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.4.0/24\"]}", ovnTransitSwitchPortAddrAnnotation: "{\"ipv4\":\"168.254.0.4/16\"}", ovnNodeGRLRPAddrAnnotaton: "{\"ipv4\":\"100.64.0.4/16\"}", + ovnNodeNetworkIDsAnnotation: "{\"blue\":\"1\"}", }, }, Status: corev1.NodeStatus{ @@ -681,6 +690,11 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { // Set the node transit switch port ips testNode4.Annotations[ovnTransitSwitchPortAddrAnnotation] = "{\"ipv4\":\"168.254.0.5/16\"}" err = zoneICHandler.AddLocalZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to get the network id for the network default on node node4") + + // Set the network id for default network + testNode4.Annotations[ovnNodeNetworkIDsAnnotation] = "{\"default\":\"0\"}" + err = zoneICHandler.AddLocalZoneNode(&testNode4) gomega.Expect(err).To(gomega.HaveOccurred(), "failed to create/update cluster router ovn_cluster_router to add transit switch port rtots-node4 for the node node4") // Create the cluster router @@ -769,6 +783,11 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { // Set the node transit switch port ips testNode4.Annotations[ovnTransitSwitchPortAddrAnnotation] = "{\"ipv4\":\"168.254.0.5/16\"}" err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.HaveOccurred(), "failed to get the network id for the network default on node node4") + + // Set the network id for default network + testNode4.Annotations[ovnNodeNetworkIDsAnnotation] = "{\"default\":\"0\"}" + err = zoneICHandler.AddLocalZoneNode(&testNode4) gomega.Expect(err).To(gomega.HaveOccurred(), "failed to update chassis node4 for remote port tstor-node4") // Create remote chassis diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index c5c28cffdd..dac7a6c9b6 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -90,6 +90,13 @@ const ( // InvalidNodeID indicates an invalid node id InvalidNodeID = -1 + + // ovnNetworkIDs is the constant string representing the ids allocated for the + // default network and other layer3 secondary networks by cluster manager. + ovnNetworkIDs = "k8s.ovn.org/network-ids" + + // invalidNetworkID signifies its an invalid network id + InvalidNetworkID = -1 ) type L3GatewayConfig struct { @@ -698,3 +705,124 @@ func GetNodeZone(node *kapi.Node) string { func NodeZoneAnnotationChanged(oldNode, newNode *corev1.Node) bool { return oldNode.Annotations[ovnNodeZoneName] != newNode.Annotations[ovnNodeZoneName] } + +func parseNetworkIDsAnnotation(nodeAnnotations map[string]string, annotationName string) (map[string]string, error) { + annotation, ok := nodeAnnotations[annotationName] + if !ok { + return nil, newAnnotationNotSetError("could not find %q annotation", annotationName) + } + + networkIdsStrMap := map[string]string{} + networkIds := make(map[string]string) + if err := json.Unmarshal([]byte(annotation), &networkIds); err != nil { + return nil, fmt.Errorf("could not parse %q annotation %q : %v", + annotationName, annotation, err) + } + for netName, v := range networkIds { + networkIdsStrMap[netName] = v + } + + if len(networkIdsStrMap) == 0 { + return nil, fmt.Errorf("unexpected empty %s annotation", annotationName) + } + + return networkIdsStrMap, nil +} + +// ParseNetworkIDAnnotation parses the 'ovnNetworkIDs' annotation for the specified +// network in 'netName' and returns the network id. +func ParseNetworkIDAnnotation(node *kapi.Node, netName string) (int, error) { + networkIDsMap, err := parseNetworkIDsAnnotation(node.Annotations, ovnNetworkIDs) + if err != nil { + return InvalidNetworkID, err + } + + networkID, ok := networkIDsMap[netName] + if !ok { + return InvalidNetworkID, newAnnotationNotSetError("node %q has no %q annotation for network %s", node.Name, ovnNetworkIDs, netName) + } + + return strconv.Atoi(networkID) +} + +// updateNetworkIDsAnnotation updates the ovnNetworkIDs annotation in the 'annotations' map +// with the provided network id in 'networkID'. If 'networkID' is InvalidNetworkID (-1) +// it deletes the ovnNetworkIDs annotation from the map. +func updateNetworkIDsAnnotation(annotations map[string]string, netName string, networkID int) error { + var bytes []byte + + // First get the all network ids for all existing networks + networkIDsMap, err := parseNetworkIDsAnnotation(annotations, ovnNetworkIDs) + if err != nil { + if !IsAnnotationNotSetError(err) { + return fmt.Errorf("failed to parse node network id annotation %q: %v", + annotations, err) + } + // in the case that the annotation does not exist + networkIDsMap = map[string]string{} + } + + // add or delete network id of the specified network + if networkID == InvalidNetworkID { + delete(networkIDsMap, netName) + } else { + networkIDsMap[netName] = strconv.Itoa(networkID) + } + + // if no networks left, just delete the network ids annotation from node annotations. + if len(networkIDsMap) == 0 { + delete(annotations, ovnNetworkIDs) + return nil + } + + // Marshal all network ids back to annotations. + networkIdsStrMap := make(map[string]string) + for n, id := range networkIDsMap { + networkIdsStrMap[n] = id + } + bytes, err = json.Marshal(networkIdsStrMap) + if err != nil { + return err + } + annotations[ovnNetworkIDs] = string(bytes) + return nil +} + +// UpdateNetworkIDAnnotation updates the ovnNetworkIDs annotation for the network name 'netName' with the network id 'networkID'. +// If 'networkID' is invalid network ID (-1), then it deletes that network from the network ids annotation. +func UpdateNetworkIDAnnotation(annotations map[string]string, netName string, networkID int) (map[string]string, error) { + if annotations == nil { + annotations = map[string]string{} + } + err := updateNetworkIDsAnnotation(annotations, netName, networkID) + if err != nil { + return nil, err + } + return annotations, nil +} + +// GetNodeNetworkIDsAnnotationNetworkIDs parses the "k8s.ovn.org/network-ids" annotation +// on a node and returns the map of network name and ids. +func GetNodeNetworkIDsAnnotationNetworkIDs(node *kapi.Node) (map[string]int, error) { + networkIDsStrMap, err := parseNetworkIDsAnnotation(node.Annotations, ovnNetworkIDs) + if err != nil { + return nil, err + } + + networkIDsMap := map[string]int{} + for netName, v := range networkIDsStrMap { + id, e := strconv.Atoi(v) + if e == nil { + networkIDsMap[netName] = id + } + } + + return networkIDsMap, nil +} + +// NodeNetworkIDAnnotationChanged returns true if the ovnNetworkIDs annotation in the corev1.Nodes doesn't match +func NodeNetworkIDAnnotationChanged(oldNode, newNode *corev1.Node, netName string) bool { + oldNodeNetID, _ := ParseNetworkIDAnnotation(oldNode, netName) + newNodeNetID, _ := ParseNetworkIDAnnotation(newNode, netName) + return oldNodeNetID != newNodeNetID +} diff --git a/go-controller/pkg/util/pod_annotation.go b/go-controller/pkg/util/pod_annotation.go index f21efe106b..2f2e2ed635 100644 --- a/go-controller/pkg/util/pod_annotation.go +++ b/go-controller/pkg/util/pod_annotation.go @@ -251,8 +251,8 @@ func UnmarshalPodAnnotationAllNetworks(annotations map[string]string) (map[strin // GetPodCIDRsWithFullMask returns the pod's IP addresses in a CIDR with FullMask format // Internally it calls GetPodIPsOfNetwork -func GetPodCIDRsWithFullMask(pod *v1.Pod) ([]*net.IPNet, error) { - podIPs, err := GetPodIPsOfNetwork(pod, &DefaultNetInfo{}) +func GetPodCIDRsWithFullMask(pod *v1.Pod, nInfo NetInfo) ([]*net.IPNet, error) { + podIPs, err := GetPodIPsOfNetwork(pod, nInfo) if err != nil { return nil, err } diff --git a/go-controller/pkg/util/pod_annotation_unit_test.go b/go-controller/pkg/util/pod_annotation_unit_test.go index 763853030c..1041594acb 100644 --- a/go-controller/pkg/util/pod_annotation_unit_test.go +++ b/go-controller/pkg/util/pod_annotation_unit_test.go @@ -290,7 +290,7 @@ func TestGetPodIPsOfNetwork(t *testing.T) { } else { assert.Equal(t, tc.outExp, res1) } - res2, e := GetPodCIDRsWithFullMask(tc.inpPod) + res2, e := GetPodCIDRsWithFullMask(tc.inpPod, &DefaultNetInfo{}) t.Log(res2, e) if tc.errAssert { assert.Error(t, e) From 06cdd00ee17cdc0d5dbfd6dd2facb4affc9133a4 Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Wed, 15 Mar 2023 16:52:37 -0400 Subject: [PATCH 11/90] Disable Secondary networks - layer2 and localnet when Interconnect feature is enabled. Interconnect feature still doesn't support interconnecting these networks with multiple zones. Its not a limitation of the interconnect feature and upcoming patches will add this support. Signed-off-by: Numan Siddique --- docs/multi-homing.md | 3 +++ .../network_controller_manager.go | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/docs/multi-homing.md b/docs/multi-homing.md index 03a9e2ebbe..c1552f07ab 100644 --- a/docs/multi-homing.md +++ b/docs/multi-homing.md @@ -116,6 +116,7 @@ spec: network will only provide layer 2 communication, and the users must configure IPs for the pods. Port security will only prevent MAC spoofing. - switched - layer2 - secondary networks **only** allow for east/west traffic. +- this topology is not supported when Interconnect feature is enabled with multiple zones. ### Switched - localnet - topology This topology interconnects the workloads via a cluster-wide logical switch to @@ -166,6 +167,7 @@ localnet network. - when the subnets attribute is omitted, the logical switch implementing the network will only provide layer 2 communication, and the users must configure IPs for the pods. Port security will only prevent MAC spoofing. +- this topology is not supported when Interconnect feature is enabled with multiple zones. ## Pod configuration The user must specify the secondary network attachments via the @@ -237,3 +239,4 @@ OVN-K currently does **not** support: - the same attachment configured multiple times in the same pod - i.e. `k8s.v1.cni.cncf.io/networks: l3-network,l3-network` is invalid. - updates to the network selection elements lists - i.e. `k8s.v1.cni.cncf.io/networks` annotation +- layer2 and localnet secondary networks when Interconnect feature is enabled with multiple zones. diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index ff226e00f1..15388585be 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -71,8 +71,14 @@ func (cm *networkControllerManager) NewNetworkController(nInfo util.NetInfo, case ovntypes.Layer3Topology: return ovn.NewSecondaryLayer3NetworkController(cnci, nInfo, netConfInfo, nil), nil case ovntypes.Layer2Topology: + if config.OVNKubernetesFeature.EnableInterconnect { + return nil, fmt.Errorf("topology type %s not supported when Interconnect feature is enabled", topoType) + } return ovn.NewSecondaryLayer2NetworkController(cnci, nInfo, netConfInfo, nil), nil case ovntypes.LocalnetTopology: + if config.OVNKubernetesFeature.EnableInterconnect { + return nil, fmt.Errorf("topology type %s not supported when Interconnect feature is enabled", topoType) + } return ovn.NewSecondaryLocalnetNetworkController(cnci, nInfo, netConfInfo, nil), nil } return nil, fmt.Errorf("topology type %s not supported", topoType) From 98b0b1fa35f0f75fd579b0c76761af5ba70e810d Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Fri, 28 Apr 2023 10:25:18 +0200 Subject: [PATCH 12/90] Regenerate mockery changes for SriovnetOps Run latest mockery to regenerate the mocked version of the SriovnetOps using the following command after changing directory to ./go-controller/pkg/util/ (future reference): ~/go/bin/mockery --name=SriovnetOps Signed-off-by: Balazs Nemeth --- go-controller/pkg/util/mocks/SriovnetOps.go | 71 ++++++++++++++++----- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/go-controller/pkg/util/mocks/SriovnetOps.go b/go-controller/pkg/util/mocks/SriovnetOps.go index 609cac1379..ddf04c8bca 100644 --- a/go-controller/pkg/util/mocks/SriovnetOps.go +++ b/go-controller/pkg/util/mocks/SriovnetOps.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.12.3. DO NOT EDIT. +// Code generated by mockery v2.26.1. DO NOT EDIT. package mocks @@ -20,6 +20,10 @@ func (_m *SriovnetOps) GetNetDevicesFromAux(auxDev string) ([]string, error) { ret := _m.Called(auxDev) var r0 []string + var r1 error + if rf, ok := ret.Get(0).(func(string) ([]string, error)); ok { + return rf(auxDev) + } if rf, ok := ret.Get(0).(func(string) []string); ok { r0 = rf(auxDev) } else { @@ -28,7 +32,6 @@ func (_m *SriovnetOps) GetNetDevicesFromAux(auxDev string) ([]string, error) { } } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(auxDev) } else { @@ -43,6 +46,10 @@ func (_m *SriovnetOps) GetNetDevicesFromPci(pciAddress string) ([]string, error) ret := _m.Called(pciAddress) var r0 []string + var r1 error + if rf, ok := ret.Get(0).(func(string) ([]string, error)); ok { + return rf(pciAddress) + } if rf, ok := ret.Get(0).(func(string) []string); ok { r0 = rf(pciAddress) } else { @@ -51,7 +58,6 @@ func (_m *SriovnetOps) GetNetDevicesFromPci(pciAddress string) ([]string, error) } } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(pciAddress) } else { @@ -66,13 +72,16 @@ func (_m *SriovnetOps) GetPfPciFromAux(auxDev string) (string, error) { ret := _m.Called(auxDev) var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(string) (string, error)); ok { + return rf(auxDev) + } if rf, ok := ret.Get(0).(func(string) string); ok { r0 = rf(auxDev) } else { r0 = ret.Get(0).(string) } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(auxDev) } else { @@ -87,13 +96,16 @@ func (_m *SriovnetOps) GetPfPciFromVfPci(vfPciAddress string) (string, error) { ret := _m.Called(vfPciAddress) var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(string) (string, error)); ok { + return rf(vfPciAddress) + } if rf, ok := ret.Get(0).(func(string) string); ok { r0 = rf(vfPciAddress) } else { r0 = ret.Get(0).(string) } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(vfPciAddress) } else { @@ -108,6 +120,10 @@ func (_m *SriovnetOps) GetRepresentorPeerMacAddress(netdev string) (net.Hardware ret := _m.Called(netdev) var r0 net.HardwareAddr + var r1 error + if rf, ok := ret.Get(0).(func(string) (net.HardwareAddr, error)); ok { + return rf(netdev) + } if rf, ok := ret.Get(0).(func(string) net.HardwareAddr); ok { r0 = rf(netdev) } else { @@ -116,7 +132,6 @@ func (_m *SriovnetOps) GetRepresentorPeerMacAddress(netdev string) (net.Hardware } } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(netdev) } else { @@ -131,13 +146,16 @@ func (_m *SriovnetOps) GetRepresentorPortFlavour(netdev string) (sriovnet.PortFl ret := _m.Called(netdev) var r0 sriovnet.PortFlavour + var r1 error + if rf, ok := ret.Get(0).(func(string) (sriovnet.PortFlavour, error)); ok { + return rf(netdev) + } if rf, ok := ret.Get(0).(func(string) sriovnet.PortFlavour); ok { r0 = rf(netdev) } else { r0 = ret.Get(0).(sriovnet.PortFlavour) } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(netdev) } else { @@ -152,13 +170,16 @@ func (_m *SriovnetOps) GetSfIndexByAuxDev(auxDev string) (int, error) { ret := _m.Called(auxDev) var r0 int + var r1 error + if rf, ok := ret.Get(0).(func(string) (int, error)); ok { + return rf(auxDev) + } if rf, ok := ret.Get(0).(func(string) int); ok { r0 = rf(auxDev) } else { r0 = ret.Get(0).(int) } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(auxDev) } else { @@ -173,13 +194,16 @@ func (_m *SriovnetOps) GetSfRepresentor(uplink string, sfIndex int) (string, err ret := _m.Called(uplink, sfIndex) var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(string, int) (string, error)); ok { + return rf(uplink, sfIndex) + } if rf, ok := ret.Get(0).(func(string, int) string); ok { r0 = rf(uplink, sfIndex) } else { r0 = ret.Get(0).(string) } - var r1 error if rf, ok := ret.Get(1).(func(string, int) error); ok { r1 = rf(uplink, sfIndex) } else { @@ -194,13 +218,16 @@ func (_m *SriovnetOps) GetUplinkRepresentor(vfPciAddress string) (string, error) ret := _m.Called(vfPciAddress) var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(string) (string, error)); ok { + return rf(vfPciAddress) + } if rf, ok := ret.Get(0).(func(string) string); ok { r0 = rf(vfPciAddress) } else { r0 = ret.Get(0).(string) } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(vfPciAddress) } else { @@ -215,13 +242,16 @@ func (_m *SriovnetOps) GetUplinkRepresentorFromAux(auxDev string) (string, error ret := _m.Called(auxDev) var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(string) (string, error)); ok { + return rf(auxDev) + } if rf, ok := ret.Get(0).(func(string) string); ok { r0 = rf(auxDev) } else { r0 = ret.Get(0).(string) } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(auxDev) } else { @@ -236,13 +266,16 @@ func (_m *SriovnetOps) GetVfIndexByPciAddress(vfPciAddress string) (int, error) ret := _m.Called(vfPciAddress) var r0 int + var r1 error + if rf, ok := ret.Get(0).(func(string) (int, error)); ok { + return rf(vfPciAddress) + } if rf, ok := ret.Get(0).(func(string) int); ok { r0 = rf(vfPciAddress) } else { r0 = ret.Get(0).(int) } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(vfPciAddress) } else { @@ -257,13 +290,16 @@ func (_m *SriovnetOps) GetVfRepresentor(uplink string, vfIndex int) (string, err ret := _m.Called(uplink, vfIndex) var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(string, int) (string, error)); ok { + return rf(uplink, vfIndex) + } if rf, ok := ret.Get(0).(func(string, int) string); ok { r0 = rf(uplink, vfIndex) } else { r0 = ret.Get(0).(string) } - var r1 error if rf, ok := ret.Get(1).(func(string, int) error); ok { r1 = rf(uplink, vfIndex) } else { @@ -278,13 +314,16 @@ func (_m *SriovnetOps) GetVfRepresentorDPU(pfID string, vfIndex string) (string, ret := _m.Called(pfID, vfIndex) var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(string, string) (string, error)); ok { + return rf(pfID, vfIndex) + } if rf, ok := ret.Get(0).(func(string, string) string); ok { r0 = rf(pfID, vfIndex) } else { r0 = ret.Get(0).(string) } - var r1 error if rf, ok := ret.Get(1).(func(string, string) error); ok { r1 = rf(pfID, vfIndex) } else { @@ -294,13 +333,13 @@ func (_m *SriovnetOps) GetVfRepresentorDPU(pfID string, vfIndex string) (string, return r0, r1 } -type NewSriovnetOpsT interface { +type mockConstructorTestingTNewSriovnetOps interface { mock.TestingT Cleanup(func()) } // NewSriovnetOps creates a new instance of SriovnetOps. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewSriovnetOps(t NewSriovnetOpsT) *SriovnetOps { +func NewSriovnetOps(t mockConstructorTestingTNewSriovnetOps) *SriovnetOps { mock := &SriovnetOps{} mock.Mock.Test(t) From 25f054394cf91d62375fa300f61685b20ff94d66 Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Thu, 27 Apr 2023 13:11:28 +0200 Subject: [PATCH 13/90] Upgrade sriovnet Signed-off-by: Balazs Nemeth --- go-controller/go.mod | 12 +- go-controller/go.sum | 34 +++-- go-controller/pkg/cni/helper_linux_test.go | 2 +- .../pkg/node/gateway_init_linux_test.go | 2 +- go-controller/pkg/util/mocks/SriovnetOps.go | 2 +- go-controller/pkg/util/nicstobridge.go | 2 +- go-controller/pkg/util/sriovnet_linux.go | 2 +- .../github.com/Mellanox/sriovnet/.travis.yml | 16 --- .../github.com/Mellanox/sriovnet/Makefile | 80 ------------ .../vendor/github.com/google/uuid/null.go | 118 ++++++++++++++++++ .../vendor/github.com/google/uuid/uuid.go | 45 ++++++- .../vendor/github.com/google/uuid/version4.go | 27 +++- .../sriovnet/.golangci.yml | 30 +++-- .../sriovnet/LICENSE | 0 .../k8snetworkplumbingwg/sriovnet/Makefile | 63 ++++++++++ .../sriovnet/README.md | 11 +- .../sriovnet/file_access.go | 4 +- .../sriovnet/mofed_ib_helper.go | 0 .../pkg/utils/filesystem/defaultfs.go | 27 ++-- .../sriovnet/pkg/utils/filesystem/fakefs.go | 0 .../pkg/utils/filesystem/filesystem.go | 0 .../pkg/utils/netlinkops/netlinkops.go | 0 .../sriovnet/sriovnet.go | 59 ++++++++- .../sriovnet/sriovnet_aux.go | 24 +++- .../sriovnet/sriovnet_helper.go | 0 .../sriovnet/sriovnet_switchdev.go | 94 ++++++++++++-- .../sriovnet/utils.go | 2 +- .../vendor/github.com/spf13/afero/.travis.yml | 26 ---- .../vendor/github.com/spf13/afero/README.md | 18 ++- .../vendor/github.com/spf13/afero/afero.go | 6 +- .../github.com/spf13/afero/appveyor.yml | 9 +- .../vendor/github.com/spf13/afero/basepath.go | 13 +- .../github.com/spf13/afero/cacheOnReadFs.go | 6 +- .../github.com/spf13/afero/const_bsds.go | 1 + .../github.com/spf13/afero/const_win_unix.go | 8 +- .../github.com/spf13/afero/copyOnWriteFs.go | 9 +- .../vendor/github.com/spf13/afero/httpFs.go | 2 +- .../spf13/afero/internal/common/adapters.go | 27 ++++ .../vendor/github.com/spf13/afero/iofs.go | 38 +++--- .../vendor/github.com/spf13/afero/ioutil.go | 19 +-- .../vendor/github.com/spf13/afero/mem/file.go | 39 ++++-- .../vendor/github.com/spf13/afero/memmap.go | 28 ++++- .../vendor/github.com/spf13/afero/regexpfs.go | 1 - .../vendor/github.com/spf13/afero/symlink.go | 6 +- .../github.com/spf13/afero/unionFile.go | 37 ++++-- .../vendor/github.com/spf13/afero/util.go | 17 ++- .../vishvananda/netns/.golangci.yml | 2 + .../github.com/vishvananda/netns/README.md | 12 +- .../github.com/vishvananda/netns/doc.go | 9 ++ .../vishvananda/netns/netns_linux.go | 111 +++++++++------- .../{netns_unspecified.go => netns_others.go} | 17 +++ .../netns/{netns.go => nshandle_linux.go} | 12 +- .../vishvananda/netns/nshandle_others.go | 45 +++++++ .../x/text/encoding/internal/internal.go | 2 +- .../x/text/unicode/norm/forminfo.go | 2 +- go-controller/vendor/modules.txt | 25 ++-- 56 files changed, 846 insertions(+), 357 deletions(-) delete mode 100644 go-controller/vendor/github.com/Mellanox/sriovnet/.travis.yml delete mode 100644 go-controller/vendor/github.com/Mellanox/sriovnet/Makefile create mode 100644 go-controller/vendor/github.com/google/uuid/null.go rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/.golangci.yml (71%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/LICENSE (100%) create mode 100644 go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/Makefile rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/README.md (54%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/file_access.go (97%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/mofed_ib_helper.go (100%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/pkg/utils/filesystem/defaultfs.go (84%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/pkg/utils/filesystem/fakefs.go (100%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/pkg/utils/filesystem/filesystem.go (100%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/pkg/utils/netlinkops/netlinkops.go (100%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/sriovnet.go (83%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/sriovnet_aux.go (80%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/sriovnet_helper.go (100%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/sriovnet_switchdev.go (82%) rename go-controller/vendor/github.com/{Mellanox => k8snetworkplumbingwg}/sriovnet/utils.go (94%) delete mode 100644 go-controller/vendor/github.com/spf13/afero/.travis.yml create mode 100644 go-controller/vendor/github.com/spf13/afero/internal/common/adapters.go create mode 100644 go-controller/vendor/github.com/vishvananda/netns/.golangci.yml create mode 100644 go-controller/vendor/github.com/vishvananda/netns/doc.go rename go-controller/vendor/github.com/vishvananda/netns/{netns_unspecified.go => netns_others.go} (63%) rename go-controller/vendor/github.com/vishvananda/netns/{netns.go => nshandle_linux.go} (75%) create mode 100644 go-controller/vendor/github.com/vishvananda/netns/nshandle_others.go diff --git a/go-controller/go.mod b/go-controller/go.mod index 5383800032..c0b9b215a5 100644 --- a/go-controller/go.mod +++ b/go-controller/go.mod @@ -3,7 +3,6 @@ module github.com/ovn-org/ovn-kubernetes/go-controller go 1.18 require ( - github.com/Mellanox/sriovnet v1.1.0 github.com/Microsoft/hcsshim v0.9.6 github.com/alexflint/go-filemutex v1.2.0 github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d @@ -12,12 +11,13 @@ require ( github.com/containernetworking/cni v1.1.2 github.com/containernetworking/plugins v1.2.0 github.com/coreos/go-iptables v0.6.0 - github.com/google/uuid v1.2.0 + github.com/google/uuid v1.3.0 github.com/gorilla/mux v1.8.0 github.com/j-keck/arping v1.0.2 github.com/k8snetworkplumbingwg/govdpa v0.1.4 github.com/k8snetworkplumbingwg/multi-networkpolicy v0.0.0-20200914073308-0f33b9190170 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 + github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc github.com/miekg/dns v1.1.31 github.com/mitchellh/copystructure v1.2.0 github.com/onsi/ginkgo v1.16.5 @@ -29,10 +29,10 @@ require ( github.com/prometheus/client_golang v1.14.0 github.com/prometheus/client_model v0.3.0 github.com/safchain/ethtool v0.3.0 - github.com/spf13/afero v1.6.0 + github.com/spf13/afero v1.9.5 github.com/stretchr/testify v1.8.0 github.com/urfave/cli/v2 v2.2.0 - github.com/vishvananda/netlink v1.2.1-beta.2.0.20230206183746-70ca0345eede + github.com/vishvananda/netlink v1.2.1-beta.2.0.20230420174744-55c8b9515a01 golang.org/x/net v0.7.0 golang.org/x/sync v0.1.0 golang.org/x/sys v0.7.0 @@ -92,12 +92,12 @@ require ( github.com/sirupsen/logrus v1.9.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/objx v0.4.0 // indirect - github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f // indirect + github.com/vishvananda/netns v0.0.4 // indirect go.opencensus.io v0.23.0 // indirect golang.org/x/crypto v0.1.0 // indirect golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 // indirect golang.org/x/term v0.5.0 // indirect - golang.org/x/text v0.7.0 // indirect + golang.org/x/text v0.9.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20220502173005-c8bf987b8c21 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go-controller/go.sum b/go-controller/go.sum index 2e68223deb..a0bcc1057b 100644 --- a/go-controller/go.sum +++ b/go-controller/go.sum @@ -4,6 +4,7 @@ cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go v0.44.3/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= @@ -16,6 +17,7 @@ cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOY cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= +cloud.google.com/go v0.75.0/go.mod h1:VGuuCn7PG0dwsd5XPVm2Mm3wlh3EL55/79EKB6hlPTY= cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= @@ -44,6 +46,7 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/Azure/azure-sdk-for-go v16.2.1+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= @@ -51,8 +54,6 @@ github.com/Azure/go-autorest v10.8.1+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSW github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/Mellanox/sriovnet v1.1.0 h1:j3KnktNJMHWPTqWXlf27OzQG0ahRO+88NauMjlazyko= -github.com/Mellanox/sriovnet v1.1.0/go.mod h1:P2Epf+52ZaPknkR60EUOvLABXZh3FBymcHPsUfikRVE= github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA= github.com/Microsoft/go-winio v0.4.16-0.20201130162521-d1ffc52c7331/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0= @@ -427,6 +428,7 @@ github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= @@ -436,13 +438,15 @@ github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLe github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.2.0 h1:qJYtXnJRWmpe7m/3XlyhrsLrEURqHRM2kxzoxXqyUDs= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= github.com/googleapis/gnostic v0.2.0/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= +github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/handlers v0.0.0-20150720190736-60c7bfde3e33/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= github.com/gorilla/mux v1.7.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= @@ -530,6 +534,8 @@ github.com/k8snetworkplumbingwg/multi-networkpolicy v0.0.0-20200914073308-0f33b9 github.com/k8snetworkplumbingwg/multi-networkpolicy v0.0.0-20200914073308-0f33b9190170/go.mod h1:CF9uYILB8GY25A/6Hhi1AWKc29qbyLu8r7Gs+uINGZE= github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 h1:VzM3TYHDgqPkettiP6I6q2jOeQFL4nrJM+UcAc4f6Fs= github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0/go.mod h1:nqCI7aelBJU61wiBeeZWJ6oi4bJy5nrjkM6lWIMA4j0= +github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc h1:v6+jUd70AayPbIRgTYUNpnBLG5cBPTY0+10y80CZeMk= +github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc/go.mod h1:jyWzGe6ZtYiPq6ih6aXCOy6mZ49Y9mNyBOLBBXnli+k= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= @@ -668,7 +674,7 @@ github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= +github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= @@ -746,9 +752,8 @@ github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= -github.com/spf13/afero v1.4.1/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= -github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY= -github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= +github.com/spf13/afero v1.9.5 h1:stMpOSZFs//0Lv29HduCmli3GUfpFoF3Y1Q/aXj/wVM= +github.com/spf13/afero v1.9.5/go.mod h1:UBogFpq8E9Hx+xc5CNTTEpTnuHVmXDwZcZcE1eb/UhQ= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.2-0.20171109065643-2da4a54c5cee/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= @@ -794,8 +799,8 @@ github.com/urfave/cli/v2 v2.2.0 h1:JTTnM6wKzdA0Jqodd966MVj4vWbbquZykeX1sKbe2C4= github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= -github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f h1:p4VB7kIXpOQvVn1ZaTIVp+3vuYAXFe3OJEvjbUYJLaA= -github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/willf/bitset v1.1.11-0.20200630133818-d5bec3311243/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= @@ -872,13 +877,14 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU= golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -1087,6 +1093,7 @@ golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1137,8 +1144,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1201,6 +1208,7 @@ golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= @@ -1291,7 +1299,9 @@ google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210108203827-ffc7fda8c3d7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210226172003-ab064af71705/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= diff --git a/go-controller/pkg/cni/helper_linux_test.go b/go-controller/pkg/cni/helper_linux_test.go index c8ef24c7dc..7e7d29dffc 100644 --- a/go-controller/pkg/cni/helper_linux_test.go +++ b/go-controller/pkg/cni/helper_linux_test.go @@ -6,10 +6,10 @@ import ( "net" "testing" - "github.com/Mellanox/sriovnet" cnitypes "github.com/containernetworking/cni/pkg/types" current "github.com/containernetworking/cni/pkg/types/100" "github.com/containernetworking/plugins/pkg/ns" + "github.com/k8snetworkplumbingwg/sriovnet" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/mocks" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 796211ccd1..b4129771b0 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -12,7 +12,7 @@ import ( "sync" "syscall" - "github.com/Mellanox/sriovnet" + "github.com/k8snetworkplumbingwg/sriovnet" "github.com/stretchr/testify/mock" "github.com/urfave/cli/v2" v1 "k8s.io/api/core/v1" diff --git a/go-controller/pkg/util/mocks/SriovnetOps.go b/go-controller/pkg/util/mocks/SriovnetOps.go index ddf04c8bca..a4635270f0 100644 --- a/go-controller/pkg/util/mocks/SriovnetOps.go +++ b/go-controller/pkg/util/mocks/SriovnetOps.go @@ -7,7 +7,7 @@ import ( mock "github.com/stretchr/testify/mock" - sriovnet "github.com/Mellanox/sriovnet" + sriovnet "github.com/k8snetworkplumbingwg/sriovnet" ) // SriovnetOps is an autogenerated mock type for the SriovnetOps type diff --git a/go-controller/pkg/util/nicstobridge.go b/go-controller/pkg/util/nicstobridge.go index 955c06fcde..0a40824c96 100644 --- a/go-controller/pkg/util/nicstobridge.go +++ b/go-controller/pkg/util/nicstobridge.go @@ -10,7 +10,7 @@ import ( "strings" "syscall" - "github.com/Mellanox/sriovnet" + "github.com/k8snetworkplumbingwg/sriovnet" "github.com/vishvananda/netlink" "k8s.io/klog/v2" ) diff --git a/go-controller/pkg/util/sriovnet_linux.go b/go-controller/pkg/util/sriovnet_linux.go index 42bda3a6e1..3300eecb8b 100644 --- a/go-controller/pkg/util/sriovnet_linux.go +++ b/go-controller/pkg/util/sriovnet_linux.go @@ -7,8 +7,8 @@ import ( "fmt" "net" - "github.com/Mellanox/sriovnet" "github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa" + "github.com/k8snetworkplumbingwg/sriovnet" ) type SriovnetOps interface { diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/.travis.yml b/go-controller/vendor/github.com/Mellanox/sriovnet/.travis.yml deleted file mode 100644 index 64e2722015..0000000000 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -# see https://docs.travis-ci.com/user/reference/overview/#Virtualization-environments -language: go - -# use the latest ubuntu environment (18.04) available on travis -dist: xenial - -go: - - 1.13.x - -before_script: - - go get -u github.com/mattn/goveralls - -script: - - make lint - - make test-coverage - - goveralls -coverprofile=sriovnet.cover -service=travis-ci diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/Makefile b/go-controller/vendor/github.com/Mellanox/sriovnet/Makefile deleted file mode 100644 index d4cb6fa42a..0000000000 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/Makefile +++ /dev/null @@ -1,80 +0,0 @@ -# Package related -PACKAGE=sriovnet -ORG_PATH=github.com/Mellanox -REPO_PATH=$(ORG_PATH)/$(PACKAGE) -GOPATH=$(CURDIR)/.gopath -GOBIN =$(CURDIR)/bin -BASE=$(GOPATH)/src/$(REPO_PATH) -GOFILES=$(shell find . -name "*.go" | grep -vE "(\/vendor\/)|(_test.go)") -PKGS=$(or $(PKG),$(shell cd $(BASE) && env GOPATH=$(GOPATH) $(GO) list ./... | grep -v "^$(PACKAGE)/vendor/")) -TESTPKGS = $(shell env GOPATH=$(GOPATH) $(GO) list -f '{{ if or .TestGoFiles .XTestGoFiles }}{{ .ImportPath }}{{ end }}' $(PKGS)) - -export GOPATH - -# Go tools -GO = go -GOLANGCI_LINT = $(GOBIN)/golangci-lint -# golangci-lint version should be updated periodically -# we keep it fixed to avoid it from unexpectedly failing on the project -# in case of a version bump -GOLANGCI_LINT_VER = v1.39.0 -TIMEOUT = 15 -Q = $(if $(filter 1,$V),,@) - -.PHONY: all -all: lint test build - -$(GOBIN): - @mkdir -p $@ - -$(BASE): ; $(info setting GOPATH...) - @mkdir -p $(dir $@) - @ln -sf $(CURDIR) $@ - -build: $(GOFILES) - @CGO_ENABLED=0 $(GO) build -v - -# Tools - -$(GOLANGCI_LINT): ; $(info building golangci-lint...) - $Q curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(GOBIN) $(GOLANGCI_LINT_VER) - -GOVERALLS = $(GOBIN)/goveralls -$(GOBIN)/goveralls: $(BASE) $(GOBIN) ; $(info building goveralls...) - $Q go get github.com/mattn/goveralls - -# Tests - -.PHONY: lint -lint: $(BASE) $(GOLANGCI_LINT) ; $(info running golangci-lint...) @ ## Run golangci-lint - $Q mkdir -p $(BASE)/test - $Q cd $(BASE) && ret=0 && \ - test -z "$$($(GOLANGCI_LINT) run | tee $(BASE)/test/lint.out)" || ret=1 ; \ - cat $(BASE)/test/lint.out ; rm -rf $(BASE)/test ; \ - exit $$ret - - -.PHONY: test tests -test: $(BASE) ; $(info running unit tests...) @ ## Run unit tests - $Q cd $(BASE) && $(GO) test -timeout $(TIMEOUT)s $(ARGS) ./... - -tests: test lint ; - -COVERAGE_MODE = count -.PHONY: test-coverage test-coverage-tools -test-coverage-tools: $(GOVERALLS) -test-coverage: COVERAGE_DIR := $(CURDIR)/test -test-coverage: test-coverage-tools $(BASE) ; $(info running coverage tests...) @ ## Run coverage tests - $Q cd $(BASE); $(GO) test -covermode=$(COVERAGE_MODE) -coverprofile=sriovnet.cover ./... - -# Misc - -.PHONY: clean -clean: ; $(info Cleaning...) @ ## Cleanup everything - @rm -rf $(GOPATH) - @rm -rf test - -.PHONY: help -help: ## Show this message - @grep -E '^[ a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ - awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' diff --git a/go-controller/vendor/github.com/google/uuid/null.go b/go-controller/vendor/github.com/google/uuid/null.go new file mode 100644 index 0000000000..d7fcbf2865 --- /dev/null +++ b/go-controller/vendor/github.com/google/uuid/null.go @@ -0,0 +1,118 @@ +// Copyright 2021 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "bytes" + "database/sql/driver" + "encoding/json" + "fmt" +) + +var jsonNull = []byte("null") + +// NullUUID represents a UUID that may be null. +// NullUUID implements the SQL driver.Scanner interface so +// it can be used as a scan destination: +// +// var u uuid.NullUUID +// err := db.QueryRow("SELECT name FROM foo WHERE id=?", id).Scan(&u) +// ... +// if u.Valid { +// // use u.UUID +// } else { +// // NULL value +// } +// +type NullUUID struct { + UUID UUID + Valid bool // Valid is true if UUID is not NULL +} + +// Scan implements the SQL driver.Scanner interface. +func (nu *NullUUID) Scan(value interface{}) error { + if value == nil { + nu.UUID, nu.Valid = Nil, false + return nil + } + + err := nu.UUID.Scan(value) + if err != nil { + nu.Valid = false + return err + } + + nu.Valid = true + return nil +} + +// Value implements the driver Valuer interface. +func (nu NullUUID) Value() (driver.Value, error) { + if !nu.Valid { + return nil, nil + } + // Delegate to UUID Value function + return nu.UUID.Value() +} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (nu NullUUID) MarshalBinary() ([]byte, error) { + if nu.Valid { + return nu.UUID[:], nil + } + + return []byte(nil), nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (nu *NullUUID) UnmarshalBinary(data []byte) error { + if len(data) != 16 { + return fmt.Errorf("invalid UUID (got %d bytes)", len(data)) + } + copy(nu.UUID[:], data) + nu.Valid = true + return nil +} + +// MarshalText implements encoding.TextMarshaler. +func (nu NullUUID) MarshalText() ([]byte, error) { + if nu.Valid { + return nu.UUID.MarshalText() + } + + return jsonNull, nil +} + +// UnmarshalText implements encoding.TextUnmarshaler. +func (nu *NullUUID) UnmarshalText(data []byte) error { + id, err := ParseBytes(data) + if err != nil { + nu.Valid = false + return err + } + nu.UUID = id + nu.Valid = true + return nil +} + +// MarshalJSON implements json.Marshaler. +func (nu NullUUID) MarshalJSON() ([]byte, error) { + if nu.Valid { + return json.Marshal(nu.UUID) + } + + return jsonNull, nil +} + +// UnmarshalJSON implements json.Unmarshaler. +func (nu *NullUUID) UnmarshalJSON(data []byte) error { + if bytes.Equal(data, jsonNull) { + *nu = NullUUID{} + return nil // valid null UUID + } + err := json.Unmarshal(data, &nu.UUID) + nu.Valid = err == nil + return err +} diff --git a/go-controller/vendor/github.com/google/uuid/uuid.go b/go-controller/vendor/github.com/google/uuid/uuid.go index 60d26bb50c..a57207aeb6 100644 --- a/go-controller/vendor/github.com/google/uuid/uuid.go +++ b/go-controller/vendor/github.com/google/uuid/uuid.go @@ -12,6 +12,7 @@ import ( "fmt" "io" "strings" + "sync" ) // A UUID is a 128 bit (16 byte) Universal Unique IDentifier as defined in RFC @@ -33,7 +34,15 @@ const ( Future // Reserved for future definition. ) -var rander = rand.Reader // random function +const randPoolSize = 16 * 16 + +var ( + rander = rand.Reader // random function + poolEnabled = false + poolMu sync.Mutex + poolPos = randPoolSize // protected with poolMu + pool [randPoolSize]byte // protected with poolMu +) type invalidLengthError struct{ len int } @@ -41,6 +50,12 @@ func (err invalidLengthError) Error() string { return fmt.Sprintf("invalid UUID length: %d", err.len) } +// IsInvalidLengthError is matcher function for custom error invalidLengthError +func IsInvalidLengthError(err error) bool { + _, ok := err.(invalidLengthError) + return ok +} + // Parse decodes s into a UUID or returns an error. Both the standard UUID // forms of xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx and // urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx are decoded as well as the @@ -249,3 +264,31 @@ func SetRand(r io.Reader) { } rander = r } + +// EnableRandPool enables internal randomness pool used for Random +// (Version 4) UUID generation. The pool contains random bytes read from +// the random number generator on demand in batches. Enabling the pool +// may improve the UUID generation throughput significantly. +// +// Since the pool is stored on the Go heap, this feature may be a bad fit +// for security sensitive applications. +// +// Both EnableRandPool and DisableRandPool are not thread-safe and should +// only be called when there is no possibility that New or any other +// UUID Version 4 generation function will be called concurrently. +func EnableRandPool() { + poolEnabled = true +} + +// DisableRandPool disables the randomness pool if it was previously +// enabled with EnableRandPool. +// +// Both EnableRandPool and DisableRandPool are not thread-safe and should +// only be called when there is no possibility that New or any other +// UUID Version 4 generation function will be called concurrently. +func DisableRandPool() { + poolEnabled = false + defer poolMu.Unlock() + poolMu.Lock() + poolPos = randPoolSize +} diff --git a/go-controller/vendor/github.com/google/uuid/version4.go b/go-controller/vendor/github.com/google/uuid/version4.go index 86160fbd07..7697802e4d 100644 --- a/go-controller/vendor/github.com/google/uuid/version4.go +++ b/go-controller/vendor/github.com/google/uuid/version4.go @@ -27,6 +27,8 @@ func NewString() string { // The strength of the UUIDs is based on the strength of the crypto/rand // package. // +// Uses the randomness pool if it was enabled with EnableRandPool. +// // A note about uniqueness derived from the UUID Wikipedia entry: // // Randomly generated UUIDs have 122 random bits. One's annual risk of being @@ -35,7 +37,10 @@ func NewString() string { // equivalent to the odds of creating a few tens of trillions of UUIDs in a // year and having one duplicate. func NewRandom() (UUID, error) { - return NewRandomFromReader(rander) + if !poolEnabled { + return NewRandomFromReader(rander) + } + return newRandomFromPool() } // NewRandomFromReader returns a UUID based on bytes read from a given io.Reader. @@ -49,3 +54,23 @@ func NewRandomFromReader(r io.Reader) (UUID, error) { uuid[8] = (uuid[8] & 0x3f) | 0x80 // Variant is 10 return uuid, nil } + +func newRandomFromPool() (UUID, error) { + var uuid UUID + poolMu.Lock() + if poolPos == randPoolSize { + _, err := io.ReadFull(rander, pool[:]) + if err != nil { + poolMu.Unlock() + return Nil, err + } + poolPos = 0 + } + copy(uuid[:], pool[poolPos:(poolPos+16)]) + poolPos += 16 + poolMu.Unlock() + + uuid[6] = (uuid[6] & 0x0f) | 0x40 // Version 4 + uuid[8] = (uuid[8] & 0x3f) | 0x80 // Variant is 10 + return uuid, nil +} diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/.golangci.yml b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/.golangci.yml similarity index 71% rename from go-controller/vendor/github.com/Mellanox/sriovnet/.golangci.yml rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/.golangci.yml index 13903829ce..64dbb3614d 100644 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/.golangci.yml +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/.golangci.yml @@ -1,3 +1,20 @@ +run: + timeout: 10m + + # If set we pass it to "go list -mod={option}". From "go help modules": + # If invoked with -mod=readonly, the go command is disallowed from the implicit + # automatic updating of go.mod described above. Instead, it fails when any changes + # to go.mod are needed. This setting is most useful to check that go.mod does + # not need updates, such as in a continuous integration and testing system. + # If invoked with -mod=vendor, the go command assumes that the vendor + # directory holds the correct copies of dependencies and ignores + # the dependency descriptions in go.mod. + # + # Allowed values: readonly|vendor|mod + # By default, it isn't set. + modules-download-mode: readonly + tests: false + linters-settings: dupl: threshold: 150 @@ -24,7 +41,7 @@ linters-settings: gocognit: min-complexity: 30 goimports: - local-prefixes: github.com/Mellanox/sriovnet + local-prefixes: github.com/k8snetworkplumbingwg/sriovnet golint: min-confidence: 0 gomnd: @@ -32,7 +49,7 @@ linters-settings: mnd: # don't include the "operation" and "assign" checks: argument,case,condition,return - ignored-numbers: 2 + ignored-numbers: "1,2,10,32" govet: check-shadowing: true settings: @@ -59,7 +76,6 @@ linters: disable-all: true enable: - bodyclose - - deadcode - depguard - dogsled - dupl @@ -71,7 +87,6 @@ linters: - gocognit - gofmt - goimports - - golint - gomnd - goprintffuncname - gosec @@ -82,25 +97,20 @@ linters: - misspell - nakedret - prealloc + - revive - rowserrcheck - exportloopref - staticcheck - - structcheck - stylecheck - typecheck - unconvert - unparam - unused - - varcheck - whitespace issues: # Excluding configuration per-path, per-linter, per-text and per-source exclude-rules: - - path: _test\.go - linters: - - gomnd - - goconst - text: "Magic number: 1" linters: - gomnd diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/LICENSE b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/LICENSE similarity index 100% rename from go-controller/vendor/github.com/Mellanox/sriovnet/LICENSE rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/LICENSE diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/Makefile b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/Makefile new file mode 100644 index 0000000000..180a8a809d --- /dev/null +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/Makefile @@ -0,0 +1,63 @@ +# Package related +PACKAGE := sriovnet +BIN_DIR := $(CURDIR)/bin +GOFILES := $(shell find . -name "*.go" | grep -vE "(\/vendor\/)|(_test.go)") +PKGS := $(or $(PKG),$(shell go list ./... | grep -v "^$(PACKAGE)/vendor/")) +TESTPKGS := $(shell go list -f '{{ if or .TestGoFiles .XTestGoFiles }}{{ .ImportPath }}{{ end }}' $(PKGS)) + +# Go tools +GOLANGCI_LINT := $(BIN_DIR)/golangci-lint +GCOV2LCOV := $(BIN_DIR)/gcov2lcov +# golangci-lint version should be updated periodically +# we keep it fixed to avoid it from unexpectedly failing on the project +# in case of a version bump +GOLANGCI_LINT_VER := v1.49.0 + +Q = $(if $(filter 1,$V),,@) + +.PHONY: all +all: lint test build + +$(BIN_DIR): + @mkdir -p $@ + +build: $(GOFILES) ;@ ## build sriovnet + @CGO_ENABLED=0 go build -v + +# Tests + +.PHONY: lint +lint: | $(GOLANGCI_LINT) ; $(info running golangci-lint...) @ ## Run lint tests + $Q $(GOLANGCI_LINT) run + +.PHONY: test tests +test: ; $(info running unit tests...) ## Run unit tests + $Q go test ./... + +tests: test lint ; ## Run all tests + +COVERAGE_MODE = count +.PHONY: test-coverage test-coverage-tools +test-coverage-tools: $(GCOV2LCOV) +test-coverage: | test-coverage-tools; $(info running coverage tests...) @ ## Run coverage tests + $Q go test -covermode=$(COVERAGE_MODE) -coverprofile=sriovnet.cover ./... + $Q $(GCOV2LCOV) -infile sriovnet.cover -outfile sriovnet.info + +# Tools +$(GOLANGCI_LINT): | $(BIN_DIR) ; $(info building golangci-lint...) + $Q GOBIN=$(BIN_DIR) go install github.com/golangci/golangci-lint/cmd/golangci-lint@$(GOLANGCI_LINT_VER) + +$(GCOV2LCOV): | $(BIN_DIR) ; $(info building gocov2lcov...) + $Q GOBIN=$(BIN_DIR) go install github.com/jandelgado/gcov2lcov@v1.0.5 + +# Misc +.PHONY: clean +clean: ; $(info Cleaning...) @ ## Cleanup everything + @rm -rf $(BIN_DIR) + @rm sriovnet.cover + @rm sriovnet.info + +.PHONY: help +help: ; @ ## Show this message + @grep -E '^[ a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ + awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/README.md b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/README.md similarity index 54% rename from go-controller/vendor/github.com/Mellanox/sriovnet/README.md rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/README.md index 138dfae169..2679318aa0 100644 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/README.md +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/README.md @@ -1,7 +1,8 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0) -[![Go Report Card](https://goreportcard.com/badge/github.com/Mellanox/sriovnet)](https://goreportcard.com/report/github.com/Mellanox/sriovnet) -[![Build Status](https://travis-ci.com/Mellanox/sriovnet.svg?branch=master)](https://travis-ci.com/Mellanox/sriovnet) -[![Coverage Status](https://coveralls.io/repos/github/Mellanox/sriovnet/badge.svg)](https://coveralls.io/github/Mellanox/sriovnet) +[![Go Report Card](https://goreportcard.com/badge/github.com/k8snetworkplumbingwg/sriovnet)](https://goreportcard.com/report/github.com/k8snetworkplumbingwg/sriovnet) +[![Build](https://github.com/k8snetworkplumbingwg/sriovnet/actions/workflows/build.yaml/badge.svg)](https://github.com/k8snetworkplumbingwg/sriovnet/actions/workflows/build.yaml) +[![Test](https://github.com/k8snetworkplumbingwg/sriovnet/actions/workflows/test.yaml/badge.svg)](https://github.com/k8snetworkplumbingwg/sriovnet/actions/workflows/test.yaml) +[![Coverage Status](https://coveralls.io/repos/github/k8snetworkplumbingwg/sriovnet/badge.svg)](https://coveralls.io/k8snetworkplumbingwg/sriovnet) # sriovnet Go library to configure SRIOV networking devices @@ -10,7 +11,7 @@ Local build and test You can use go get command: ``` -go get github.com/Mellanox/sriovnet +go get github.com/k8snetworkplumbingwg/sriovnet ``` Example: @@ -21,7 +22,7 @@ package main import ( "fmt" - "github.com/Mellanox/sriovnet" + "github.com/k8snetworkplumbingwg/sriovnet" ) func main() { diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/file_access.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/file_access.go similarity index 97% rename from go-controller/vendor/github.com/Mellanox/sriovnet/file_access.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/file_access.go index a22b4911ef..b0fe653b3b 100644 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/file_access.go +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/file_access.go @@ -1,8 +1,8 @@ +//nolint:gomnd package sriovnet import ( "io" - "io/ioutil" "os" "strconv" "strings" @@ -56,7 +56,7 @@ func (attrib *fileObject) Read() (str string, err error) { if err != nil { return "", err } - data, err := ioutil.ReadAll(attrib.File) + data, err := io.ReadAll(attrib.File) if err != nil { return "", err } diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/mofed_ib_helper.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/mofed_ib_helper.go similarity index 100% rename from go-controller/vendor/github.com/Mellanox/sriovnet/mofed_ib_helper.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/mofed_ib_helper.go diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/filesystem/defaultfs.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem/defaultfs.go similarity index 84% rename from go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/filesystem/defaultfs.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem/defaultfs.go index c795caf51b..f092e86df3 100644 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/filesystem/defaultfs.go +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem/defaultfs.go @@ -1,7 +1,7 @@ package filesystem import ( - "io/ioutil" + "io/fs" "os" "path/filepath" "time" @@ -61,26 +61,39 @@ func (DefaultFs) Symlink(oldname, newname string) error { // ReadFile via ioutil.ReadFile func (DefaultFs) ReadFile(filename string) ([]byte, error) { - return ioutil.ReadFile(filename) + return os.ReadFile(filename) } // TempDir via ioutil.TempDir func (DefaultFs) TempDir(dir, prefix string) (string, error) { - return ioutil.TempDir(dir, prefix) + return os.MkdirTemp(dir, prefix) } // TempFile via ioutil.TempFile func (DefaultFs) TempFile(dir, prefix string) (File, error) { - file, err := ioutil.TempFile(dir, prefix) + file, err := os.CreateTemp(dir, prefix) if err != nil { return nil, err } return &defaultFile{file}, nil } -// ReadDir via ioutil.ReadDir +// ReadDir via os.ReadDir func (DefaultFs) ReadDir(dirname string) ([]os.FileInfo, error) { - return ioutil.ReadDir(dirname) + entries, err := os.ReadDir(dirname) + if err != nil { + return nil, err + } + + infos := make([]fs.FileInfo, 0, len(entries)) + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + return nil, err + } + infos = append(infos, info) + } + return infos, nil } // Walk via filepath.Walk @@ -90,7 +103,7 @@ func (DefaultFs) Walk(root string, walkFn filepath.WalkFunc) error { // WriteFile via ioutil.Writefile func (DefaultFs) WriteFile(filename string, data []byte, perm os.FileMode) error { - return ioutil.WriteFile(filename, data, perm) + return os.WriteFile(filename, data, perm) } // defaultFile implements File using same-named functions from "os" diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/filesystem/fakefs.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem/fakefs.go similarity index 100% rename from go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/filesystem/fakefs.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem/fakefs.go diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/filesystem/filesystem.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem/filesystem.go similarity index 100% rename from go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/filesystem/filesystem.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem/filesystem.go diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/netlinkops/netlinkops.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/netlinkops/netlinkops.go similarity index 100% rename from go-controller/vendor/github.com/Mellanox/sriovnet/pkg/utils/netlinkops/netlinkops.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/netlinkops/netlinkops.go diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet.go similarity index 83% rename from go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet.go index a3583e4298..0a961a7353 100644 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet.go +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet.go @@ -14,8 +14,8 @@ import ( "github.com/google/uuid" "github.com/vishvananda/netlink" - utilfs "github.com/Mellanox/sriovnet/pkg/utils/filesystem" - "github.com/Mellanox/sriovnet/pkg/utils/netlinkops" + utilfs "github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem" + "github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/netlinkops" ) const ( @@ -25,8 +25,9 @@ const ( ) var ( - virtFnRe = regexp.MustCompile(`virtfn(\d+)`) - pciAddressRe = regexp.MustCompile(`^[0-9a-f]{4}:[0-9a-f]{2}:[01][0-9a-f].[0-7]$`) + virtFnRe = regexp.MustCompile(`virtfn(\d+)`) + pciAddressRe = regexp.MustCompile(`^[0-9a-f]{4}:[0-9a-f]{2}:[01][0-9a-f].[0-7]$`) + auxiliaryDeviceRe = regexp.MustCompile(`^(\S+\.){2}\d+$`) ) type VfObj struct { @@ -432,6 +433,24 @@ func GetVfIndexByPciAddress(vfPciAddress string) (int, error) { return -1, fmt.Errorf("vf index for %s not found", vfPciAddress) } +// gets the PF index that's associated with a VF PCI address (e.g '0000:03:00.4') +func GetPfIndexByVfPciAddress(vfPciAddress string) (int, error) { + const pciParts = 4 + pfPciAddress, err := GetPfPciFromVfPci(vfPciAddress) + if err != nil { + return -1, err + } + var domain, bus, dev, fn int + parsed, err := fmt.Sscanf(pfPciAddress, "%04x:%02x:%02x.%d", &domain, &bus, &dev, &fn) + if err != nil { + return -1, fmt.Errorf("error trying to parse PF PCI address %s: %v", pfPciAddress, err) + } + if parsed != pciParts { + return -1, fmt.Errorf("failed to parse PF PCI address %s. Unexpected format", pfPciAddress) + } + return fn, err +} + // GetPfPciFromVfPci retrieves the parent PF PCI address of the provided VF PCI address in D:B:D.f format func GetPfPciFromVfPci(vfPciAddress string) (string, error) { pfPath := filepath.Join(PciSysDir, vfPciAddress, "physfn") @@ -453,3 +472,35 @@ func GetNetDevicesFromPci(pciAddress string) ([]string, error) { pciDir := filepath.Join(PciSysDir, pciAddress, "net") return getFileNamesFromPath(pciDir) } + +// GetPciFromNetDevice returns the PCI address associated with a network device name +func GetPciFromNetDevice(name string) (string, error) { + devPath := filepath.Join(NetSysDir, name) + + realPath, err := utilfs.Fs.Readlink(devPath) + if err != nil { + return "", fmt.Errorf("device %s not found: %s", name, err) + } + + parent := filepath.Dir(realPath) + base := filepath.Base(parent) + // Devices can have their PCI device sysfs entry at different levels: + // PF, VF, SF representor: + // /sys/devices/pci0000:00/.../0000:03:00.0/net/p0 + // /sys/devices/pci0000:00/.../0000:03:00.0/net/pf0hpf + // /sys/devices/pci0000:00/.../0000:03:00.0/net/pf0vf0 + // /sys/devices/pci0000:00/.../0000:03:00.0/net/pf0sf0 + // SF port: + // /sys/devices/pci0000:00/.../0000:03:00.0/mlx5_core.sf.3/net/enp3s0f0s1 + // This loop allows detecting any of them. + for parent != "/" && !pciAddressRe.MatchString(base) { + parent = filepath.Dir(parent) + base = filepath.Base(parent) + } + // If we stopped on '/' and the base was never a proper PCI address, + // then 'netdev' is not a PCI device. + if !pciAddressRe.MatchString(base) { + return "", fmt.Errorf("device %s is not a PCI device: %s", name, realPath) + } + return base, nil +} diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet_aux.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet_aux.go similarity index 80% rename from go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet_aux.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet_aux.go index f36329ed6b..a60061b3ee 100644 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet_aux.go +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet_aux.go @@ -25,7 +25,7 @@ import ( "strconv" "strings" - utilfs "github.com/Mellanox/sriovnet/pkg/utils/filesystem" + utilfs "github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem" ) // GetNetDeviceFromAux gets auxiliary device name (e.g 'mlx5_core.sf.2') and @@ -87,3 +87,25 @@ func GetUplinkRepresentorFromAux(auxDev string) (string, error) { return GetUplinkRepresentor(pfPci) } + +// GetAuxNetDevicesFromPci returns a list of auxiliary devices names for the specified PCI network device +func GetAuxNetDevicesFromPci(pciAddr string) ([]string, error) { + baseDev := filepath.Join(PciSysDir, pciAddr) + // ensure that "net" folder exists, meaning it is network PCI device + if _, err := utilfs.Fs.Stat(filepath.Join(baseDev, "net")); err != nil { + return nil, err + } + + files, err := utilfs.Fs.ReadDir(baseDev) + if err != nil { + return nil, err + } + + auxDevs := make([]string, 0) + for _, file := range files { + if auxiliaryDeviceRe.MatchString(file.Name()) { + auxDevs = append(auxDevs, file.Name()) + } + } + return auxDevs, nil +} diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet_helper.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet_helper.go similarity index 100% rename from go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet_helper.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet_helper.go diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet_switchdev.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet_switchdev.go similarity index 82% rename from go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet_switchdev.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet_switchdev.go index f182cfe0ee..5ccf3fadc5 100644 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/sriovnet_switchdev.go +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/sriovnet_switchdev.go @@ -1,6 +1,7 @@ package sriovnet import ( + "bytes" "errors" "fmt" "net" @@ -10,8 +11,8 @@ import ( "strconv" "strings" - utilfs "github.com/Mellanox/sriovnet/pkg/utils/filesystem" - "github.com/Mellanox/sriovnet/pkg/utils/netlinkops" + utilfs "github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem" + "github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/netlinkops" ) const ( @@ -22,7 +23,7 @@ const ( type PortFlavour uint16 // Keep things consistent with netlink lib constants -// nolint:golint,stylecheck +// nolint:revive,stylecheck const ( PORT_FLAVOUR_PHYSICAL = iota PORT_FLAVOUR_CPU @@ -88,7 +89,7 @@ func isSwitchdev(netdevice string) bool { if err != nil { return false } - if physSwitchID != nil && string(physSwitchID) != "" { + if len(physSwitchID) != 0 { return true } return false @@ -126,7 +127,7 @@ func GetUplinkRepresentor(pciAddress string) (string, error) { func GetVfRepresentor(uplink string, vfIndex int) (string, error) { swIDFile := filepath.Join(NetSysDir, uplink, netdevPhysSwitchID) physSwitchID, err := utilfs.Fs.ReadFile(swIDFile) - if err != nil || string(physSwitchID) == "" { + if err != nil || len(physSwitchID) == 0 { return "", fmt.Errorf("cant get uplink %s switch id", uplink) } @@ -139,7 +140,7 @@ func GetVfRepresentor(uplink string, vfIndex int) (string, error) { devicePath := filepath.Join(NetSysDir, device.Name()) deviceSwIDFile := filepath.Join(devicePath, netdevPhysSwitchID) deviceSwID, err := utilfs.Fs.ReadFile(deviceSwIDFile) - if err != nil || string(deviceSwID) != string(physSwitchID) { + if err != nil || !bytes.Equal(deviceSwID, physSwitchID) { continue } physPortNameStr, err := getNetDevPhysPortName(device.Name()) @@ -226,6 +227,40 @@ func findNetdevWithPortNameCriteria(criteria func(string) bool) (string, error) return "", fmt.Errorf("no representor matched criteria") } +// GetPortIndexFromRepresentor finds the index of a representor from its network device name. +// Supports VF and SF. For multiple port flavors, the same ID could be returned, i.e. +// +// pf0vf10 and pf0sf10 +// +// will return the same port ID. To further differentiate the ports, use GetRepresentorPortFlavour +func GetPortIndexFromRepresentor(repNetDev string) (int, error) { + flavor, err := GetRepresentorPortFlavour(repNetDev) + if err != nil { + return 0, err + } + + if flavor != PORT_FLAVOUR_PCI_VF && flavor != PORT_FLAVOUR_PCI_SF { + return 0, fmt.Errorf("unsupported port flavor for netdev %s", repNetDev) + } + + physPortName, err := getNetDevPhysPortName(repNetDev) + if err != nil { + return 0, fmt.Errorf("failed to get device %s physical port name: %v", repNetDev, err) + } + + typeToRegex := map[PortFlavour]*regexp.Regexp{ + PORT_FLAVOUR_PCI_VF: vfPortRepRegex, + PORT_FLAVOUR_PCI_SF: sfPortRepRegex, + } + + _, repIndex, err := parseIndexFromPhysPortName(physPortName, typeToRegex[flavor]) + if err != nil { + return 0, fmt.Errorf("failed to parse the physical port name of device %s: %v", repNetDev, err) + } + + return repIndex, nil +} + // GetVfRepresentorDPU returns VF representor on DPU for a host VF identified by pfID and vfIndex func GetVfRepresentorDPU(pfID, vfIndex string) (string, error) { // TODO(Adrianc): This method should change to get switchID and vfIndex as input, then common logic can @@ -262,6 +297,39 @@ func GetVfRepresentorDPU(pfID, vfIndex string) (string, error) { return netdev, nil } +// GetSfRepresentorDPU returns SF representor on DPU for a host SF identified by pfID and sfIndex +func GetSfRepresentorDPU(pfID, sfIndex string) (string, error) { + // pfID should be 0 or 1 + if pfID != "0" && pfID != "1" { + return "", fmt.Errorf("unexpected pfID(%s). It should be 0 or 1", pfID) + } + + // sfIndex should be an unsinged integer provided as a decimal number + if _, err := strconv.ParseUint(sfIndex, 10, 32); err != nil { + return "", fmt.Errorf("unexpected sfIndex(%s). It should be an unsigned decimal number", sfIndex) + } + + // map for easy search of expected VF rep port name. + // Note: no support for Multi-Chassis DPUs + expectedPhysPortNames := map[string]interface{}{ + fmt.Sprintf("pf%ssf%s", pfID, sfIndex): nil, + fmt.Sprintf("c1pf%ssf%s", pfID, sfIndex): nil, + } + + netdev, err := findNetdevWithPortNameCriteria(func(portName string) bool { + // if phys port name == pfsf or c1pfsf we have a match + if _, ok := expectedPhysPortNames[portName]; ok { + return true + } + return false + }) + + if err != nil { + return "", fmt.Errorf("sf representor for pfID:%s, sfIndex:%s not found", pfID, sfIndex) + } + return netdev, nil +} + // GetRepresentorPortFlavour returns the representor port flavour // Note: this method does not support old representor names used by old kernels // e.g and will return PORT_FLAVOUR_UNKNOWN for such cases. @@ -287,6 +355,7 @@ func GetRepresentorPortFlavour(netdev string) (PortFlavour, error) { PORT_FLAVOUR_PHYSICAL: physPortRepRegex, PORT_FLAVOUR_PCI_PF: pfPortRepRegex, PORT_FLAVOUR_PCI_VF: vfPortRepRegex, + PORT_FLAVOUR_PCI_SF: sfPortRepRegex, } for flavour, regex := range typeToRegex { if regex.MatchString(portName) { @@ -300,9 +369,11 @@ func GetRepresentorPortFlavour(netdev string) (PortFlavour, error) { // representor port. The format of the file is a set of : pairs as follows: // // ``` -// MAC : 0c:42:a1:c6:cf:7c -// MaxTxRate : 0 -// State : Follow +// +// MAC : 0c:42:a1:c6:cf:7c +// MaxTxRate : 0 +// State : Follow +// // ``` func parseDPUConfigFileOutput(out string) map[string]string { configMap := make(map[string]string) @@ -320,8 +391,9 @@ func parseDPUConfigFileOutput(out string) map[string]string { // GetRepresentorPeerMacAddress returns the MAC address of the peer netdev associated with the given // representor netdev // Note: -// This method functionality is currently supported only on DPUs. -// Currently only netdev representors with PORT_FLAVOUR_PCI_PF are supported +// +// This method functionality is currently supported only on DPUs. +// Currently only netdev representors with PORT_FLAVOUR_PCI_PF are supported func GetRepresentorPeerMacAddress(netdev string) (net.HardwareAddr, error) { flavor, err := GetRepresentorPortFlavour(netdev) if err != nil { diff --git a/go-controller/vendor/github.com/Mellanox/sriovnet/utils.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/utils.go similarity index 94% rename from go-controller/vendor/github.com/Mellanox/sriovnet/utils.go rename to go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/utils.go index c4ee663880..84772da95b 100644 --- a/go-controller/vendor/github.com/Mellanox/sriovnet/utils.go +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/sriovnet/utils.go @@ -23,7 +23,7 @@ import ( "fmt" "strings" - utilfs "github.com/Mellanox/sriovnet/pkg/utils/filesystem" + utilfs "github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem" ) func getFileNamesFromPath(dir string) ([]string, error) { diff --git a/go-controller/vendor/github.com/spf13/afero/.travis.yml b/go-controller/vendor/github.com/spf13/afero/.travis.yml deleted file mode 100644 index e944f59474..0000000000 --- a/go-controller/vendor/github.com/spf13/afero/.travis.yml +++ /dev/null @@ -1,26 +0,0 @@ -sudo: false -language: go -arch: - - amd64 - - ppc64e - -go: - - "1.14" - - "1.15" - - "1.16" - - tip - -os: - - linux - - osx - -matrix: - allow_failures: - - go: tip - fast_finish: true - -script: - - go build -v ./... - - go test -count=1 -cover -race -v ./... - - go vet ./... - - FILES=$(gofmt -s -l . zipfs sftpfs mem tarfs); if [[ -n "${FILES}" ]]; then echo "You have go format errors; gofmt your changes"; exit 1; fi diff --git a/go-controller/vendor/github.com/spf13/afero/README.md b/go-controller/vendor/github.com/spf13/afero/README.md index fb8eaaf896..3bafbfdfca 100644 --- a/go-controller/vendor/github.com/spf13/afero/README.md +++ b/go-controller/vendor/github.com/spf13/afero/README.md @@ -2,7 +2,7 @@ A FileSystem Abstraction System for Go -[![Build Status](https://travis-ci.org/spf13/afero.svg)](https://travis-ci.org/spf13/afero) [![Build status](https://ci.appveyor.com/api/projects/status/github/spf13/afero?branch=master&svg=true)](https://ci.appveyor.com/project/spf13/afero) [![GoDoc](https://godoc.org/github.com/spf13/afero?status.svg)](https://godoc.org/github.com/spf13/afero) [![Join the chat at https://gitter.im/spf13/afero](https://badges.gitter.im/Dev%20Chat.svg)](https://gitter.im/spf13/afero?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![Test](https://github.com/spf13/afero/actions/workflows/test.yml/badge.svg)](https://github.com/spf13/afero/actions/workflows/test.yml) [![GoDoc](https://godoc.org/github.com/spf13/afero?status.svg)](https://godoc.org/github.com/spf13/afero) [![Join the chat at https://gitter.im/spf13/afero](https://badges.gitter.im/Dev%20Chat.svg)](https://gitter.im/spf13/afero?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) # Overview @@ -79,11 +79,11 @@ would. So if my application before had: ```go -os.Open('/tmp/foo') +os.Open("/tmp/foo") ``` We would replace it with: ```go -AppFs.Open('/tmp/foo') +AppFs.Open("/tmp/foo") ``` `AppFs` being the variable we defined above. @@ -259,6 +259,18 @@ system using InMemoryFile. Afero has experimental support for secure file transfer protocol (sftp). Which can be used to perform file operations over a encrypted channel. +### GCSFs + +Afero has experimental support for Google Cloud Storage (GCS). You can either set the +`GOOGLE_APPLICATION_CREDENTIALS_JSON` env variable to your JSON credentials or use `opts` in +`NewGcsFS` to configure access to your GCS bucket. + +Some known limitations of the existing implementation: +* No Chmod support - The GCS ACL could probably be mapped to *nix style permissions but that would add another level of complexity and is ignored in this version. +* No Chtimes support - Could be simulated with attributes (gcs a/m-times are set implicitly) but that's is left for another version. +* Not thread safe - Also assumes all file operations are done through the same instance of the GcsFs. File operations between different GcsFs instances are not guaranteed to be consistent. + + ## Filtering Backends ### BasePathFs diff --git a/go-controller/vendor/github.com/spf13/afero/afero.go b/go-controller/vendor/github.com/spf13/afero/afero.go index 469ff7d2dc..39f6585209 100644 --- a/go-controller/vendor/github.com/spf13/afero/afero.go +++ b/go-controller/vendor/github.com/spf13/afero/afero.go @@ -97,14 +97,14 @@ type Fs interface { // Chown changes the uid and gid of the named file. Chown(name string, uid, gid int) error - //Chtimes changes the access and modification times of the named file + // Chtimes changes the access and modification times of the named file Chtimes(name string, atime time.Time, mtime time.Time) error } var ( ErrFileClosed = errors.New("File is closed") - ErrOutOfRange = errors.New("Out of range") - ErrTooLarge = errors.New("Too large") + ErrOutOfRange = errors.New("out of range") + ErrTooLarge = errors.New("too large") ErrFileNotFound = os.ErrNotExist ErrFileExists = os.ErrExist ErrDestinationExists = os.ErrExist diff --git a/go-controller/vendor/github.com/spf13/afero/appveyor.yml b/go-controller/vendor/github.com/spf13/afero/appveyor.yml index 5d2f34bf16..65e20e8ca3 100644 --- a/go-controller/vendor/github.com/spf13/afero/appveyor.yml +++ b/go-controller/vendor/github.com/spf13/afero/appveyor.yml @@ -1,3 +1,5 @@ +# This currently does nothing. We have moved to GitHub action, but this is kept +# until spf13 has disabled this project in AppVeyor. version: '{build}' clone_folder: C:\gopath\src\github.com\spf13\afero environment: @@ -6,10 +8,3 @@ build_script: - cmd: >- go version - go env - - go get -v github.com/spf13/afero/... - - go build -v github.com/spf13/afero/... -test_script: -- cmd: go test -count=1 -cover -race -v github.com/spf13/afero/... diff --git a/go-controller/vendor/github.com/spf13/afero/basepath.go b/go-controller/vendor/github.com/spf13/afero/basepath.go index 4f9832829d..2e72793a3e 100644 --- a/go-controller/vendor/github.com/spf13/afero/basepath.go +++ b/go-controller/vendor/github.com/spf13/afero/basepath.go @@ -1,6 +1,7 @@ package afero import ( + "io/fs" "os" "path/filepath" "runtime" @@ -8,7 +9,10 @@ import ( "time" ) -var _ Lstater = (*BasePathFs)(nil) +var ( + _ Lstater = (*BasePathFs)(nil) + _ fs.ReadDirFile = (*BasePathFile)(nil) +) // The BasePathFs restricts all operations to a given path within an Fs. // The given file name to the operations on this Fs will be prepended with @@ -33,6 +37,13 @@ func (f *BasePathFile) Name() string { return strings.TrimPrefix(sourcename, filepath.Clean(f.path)) } +func (f *BasePathFile) ReadDir(n int) ([]fs.DirEntry, error) { + if rdf, ok := f.File.(fs.ReadDirFile); ok { + return rdf.ReadDir(n) + } + return readDirFile{f.File}.ReadDir(n) +} + func NewBasePathFs(source Fs, path string) Fs { return &BasePathFs{source: source, path: path} } diff --git a/go-controller/vendor/github.com/spf13/afero/cacheOnReadFs.go b/go-controller/vendor/github.com/spf13/afero/cacheOnReadFs.go index 71471aa25c..017d344fd5 100644 --- a/go-controller/vendor/github.com/spf13/afero/cacheOnReadFs.go +++ b/go-controller/vendor/github.com/spf13/afero/cacheOnReadFs.go @@ -75,6 +75,10 @@ func (u *CacheOnReadFs) copyToLayer(name string) error { return copyToLayer(u.base, u.layer, name) } +func (u *CacheOnReadFs) copyFileToLayer(name string, flag int, perm os.FileMode) error { + return copyFileToLayer(u.base, u.layer, name, flag, perm) +} + func (u *CacheOnReadFs) Chtimes(name string, atime, mtime time.Time) error { st, _, err := u.cacheStatus(name) if err != nil { @@ -212,7 +216,7 @@ func (u *CacheOnReadFs) OpenFile(name string, flag int, perm os.FileMode) (File, switch st { case cacheLocal, cacheHit: default: - if err := u.copyToLayer(name); err != nil { + if err := u.copyFileToLayer(name, flag, perm); err != nil { return nil, err } } diff --git a/go-controller/vendor/github.com/spf13/afero/const_bsds.go b/go-controller/vendor/github.com/spf13/afero/const_bsds.go index 18b45824be..eed0f225fd 100644 --- a/go-controller/vendor/github.com/spf13/afero/const_bsds.go +++ b/go-controller/vendor/github.com/spf13/afero/const_bsds.go @@ -11,6 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build aix || darwin || openbsd || freebsd || netbsd || dragonfly // +build aix darwin openbsd freebsd netbsd dragonfly package afero diff --git a/go-controller/vendor/github.com/spf13/afero/const_win_unix.go b/go-controller/vendor/github.com/spf13/afero/const_win_unix.go index 2b850e4ddb..004d57e2ff 100644 --- a/go-controller/vendor/github.com/spf13/afero/const_win_unix.go +++ b/go-controller/vendor/github.com/spf13/afero/const_win_unix.go @@ -10,12 +10,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// +build !darwin -// +build !openbsd -// +build !freebsd -// +build !dragonfly -// +build !netbsd -// +build !aix +//go:build !darwin && !openbsd && !freebsd && !dragonfly && !netbsd && !aix +// +build !darwin,!openbsd,!freebsd,!dragonfly,!netbsd,!aix package afero diff --git a/go-controller/vendor/github.com/spf13/afero/copyOnWriteFs.go b/go-controller/vendor/github.com/spf13/afero/copyOnWriteFs.go index 6ff8f3099a..184d6dd702 100644 --- a/go-controller/vendor/github.com/spf13/afero/copyOnWriteFs.go +++ b/go-controller/vendor/github.com/spf13/afero/copyOnWriteFs.go @@ -223,7 +223,7 @@ func (u *CopyOnWriteFs) OpenFile(name string, flag int, perm os.FileMode) (File, return nil, err } if isaDir { - if err = u.layer.MkdirAll(dir, 0777); err != nil { + if err = u.layer.MkdirAll(dir, 0o777); err != nil { return nil, err } return u.layer.OpenFile(name, flag, perm) @@ -247,8 +247,9 @@ func (u *CopyOnWriteFs) OpenFile(name string, flag int, perm os.FileMode) (File, // This function handles the 9 different possibilities caused // by the union which are the intersection of the following... -// layer: doesn't exist, exists as a file, and exists as a directory -// base: doesn't exist, exists as a file, and exists as a directory +// +// layer: doesn't exist, exists as a file, and exists as a directory +// base: doesn't exist, exists as a file, and exists as a directory func (u *CopyOnWriteFs) Open(name string) (File, error) { // Since the overlay overrides the base we check that first b, err := u.isBaseFile(name) @@ -322,5 +323,5 @@ func (u *CopyOnWriteFs) MkdirAll(name string, perm os.FileMode) error { } func (u *CopyOnWriteFs) Create(name string) (File, error) { - return u.OpenFile(name, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0666) + return u.OpenFile(name, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0o666) } diff --git a/go-controller/vendor/github.com/spf13/afero/httpFs.go b/go-controller/vendor/github.com/spf13/afero/httpFs.go index 2b86e30d1e..ac0de6d51f 100644 --- a/go-controller/vendor/github.com/spf13/afero/httpFs.go +++ b/go-controller/vendor/github.com/spf13/afero/httpFs.go @@ -29,7 +29,7 @@ type httpDir struct { } func (d httpDir) Open(name string) (http.File, error) { - if filepath.Separator != '/' && strings.IndexRune(name, filepath.Separator) >= 0 || + if filepath.Separator != '/' && strings.ContainsRune(name, filepath.Separator) || strings.Contains(name, "\x00") { return nil, errors.New("http: invalid character in file path") } diff --git a/go-controller/vendor/github.com/spf13/afero/internal/common/adapters.go b/go-controller/vendor/github.com/spf13/afero/internal/common/adapters.go new file mode 100644 index 0000000000..60685caa54 --- /dev/null +++ b/go-controller/vendor/github.com/spf13/afero/internal/common/adapters.go @@ -0,0 +1,27 @@ +// Copyright Ā© 2022 Steve Francia . +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import "io/fs" + +// FileInfoDirEntry provides an adapter from os.FileInfo to fs.DirEntry +type FileInfoDirEntry struct { + fs.FileInfo +} + +var _ fs.DirEntry = FileInfoDirEntry{} + +func (d FileInfoDirEntry) Type() fs.FileMode { return d.FileInfo.Mode().Type() } + +func (d FileInfoDirEntry) Info() (fs.FileInfo, error) { return d.FileInfo, nil } diff --git a/go-controller/vendor/github.com/spf13/afero/iofs.go b/go-controller/vendor/github.com/spf13/afero/iofs.go index c80345536d..938b9316e6 100644 --- a/go-controller/vendor/github.com/spf13/afero/iofs.go +++ b/go-controller/vendor/github.com/spf13/afero/iofs.go @@ -1,3 +1,4 @@ +//go:build go1.16 // +build go1.16 package afero @@ -7,7 +8,10 @@ import ( "io/fs" "os" "path" + "sort" "time" + + "github.com/spf13/afero/internal/common" ) // IOFS adopts afero.Fs to stdlib io/fs.FS @@ -66,14 +70,31 @@ func (iofs IOFS) Glob(pattern string) ([]string, error) { } func (iofs IOFS) ReadDir(name string) ([]fs.DirEntry, error) { - items, err := ReadDir(iofs.Fs, name) + f, err := iofs.Fs.Open(name) if err != nil { return nil, iofs.wrapError("readdir", name, err) } + defer f.Close() + + if rdf, ok := f.(fs.ReadDirFile); ok { + items, err := rdf.ReadDir(-1) + if err != nil { + return nil, iofs.wrapError("readdir", name, err) + } + sort.Slice(items, func(i, j int) bool { return items[i].Name() < items[j].Name() }) + return items, nil + } + + items, err := f.Readdir(-1) + if err != nil { + return nil, iofs.wrapError("readdir", name, err) + } + sort.Sort(byName(items)) + ret := make([]fs.DirEntry, len(items)) for i := range items { - ret[i] = dirEntry{items[i]} + ret[i] = common.FileInfoDirEntry{FileInfo: items[i]} } return ret, nil @@ -108,17 +129,6 @@ func (IOFS) wrapError(op, path string, err error) error { } } -// dirEntry provides adapter from os.FileInfo to fs.DirEntry -type dirEntry struct { - fs.FileInfo -} - -var _ fs.DirEntry = dirEntry{} - -func (d dirEntry) Type() fs.FileMode { return d.FileInfo.Mode().Type() } - -func (d dirEntry) Info() (fs.FileInfo, error) { return d.FileInfo, nil } - // readDirFile provides adapter from afero.File to fs.ReadDirFile needed for correct Open type readDirFile struct { File @@ -134,7 +144,7 @@ func (r readDirFile) ReadDir(n int) ([]fs.DirEntry, error) { ret := make([]fs.DirEntry, len(items)) for i := range items { - ret[i] = dirEntry{items[i]} + ret[i] = common.FileInfoDirEntry{FileInfo: items[i]} } return ret, nil diff --git a/go-controller/vendor/github.com/spf13/afero/ioutil.go b/go-controller/vendor/github.com/spf13/afero/ioutil.go index a403133e27..fa6abe1eee 100644 --- a/go-controller/vendor/github.com/spf13/afero/ioutil.go +++ b/go-controller/vendor/github.com/spf13/afero/ioutil.go @@ -141,8 +141,10 @@ func WriteFile(fs Fs, filename string, data []byte, perm os.FileMode) error { // We generate random temporary file names so that there's a good // chance the file doesn't exist yet - keeps the number of tries in // TempFile to a minimum. -var rand uint32 -var randmu sync.Mutex +var ( + randNum uint32 + randmu sync.Mutex +) func reseed() uint32 { return uint32(time.Now().UnixNano() + int64(os.Getpid())) @@ -150,12 +152,12 @@ func reseed() uint32 { func nextRandom() string { randmu.Lock() - r := rand + r := randNum if r == 0 { r = reseed() } r = r*1664525 + 1013904223 // constants from Numerical Recipes - rand = r + randNum = r randmu.Unlock() return strconv.Itoa(int(1e9 + r%1e9))[1:] } @@ -190,11 +192,11 @@ func TempFile(fs Fs, dir, pattern string) (f File, err error) { nconflict := 0 for i := 0; i < 10000; i++ { name := filepath.Join(dir, prefix+nextRandom()+suffix) - f, err = fs.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600) + f, err = fs.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600) if os.IsExist(err) { if nconflict++; nconflict > 10 { randmu.Lock() - rand = reseed() + randNum = reseed() randmu.Unlock() } continue @@ -214,6 +216,7 @@ func TempFile(fs Fs, dir, pattern string) (f File, err error) { func (a Afero) TempDir(dir, prefix string) (name string, err error) { return TempDir(a.Fs, dir, prefix) } + func TempDir(fs Fs, dir, prefix string) (name string, err error) { if dir == "" { dir = os.TempDir() @@ -222,11 +225,11 @@ func TempDir(fs Fs, dir, prefix string) (name string, err error) { nconflict := 0 for i := 0; i < 10000; i++ { try := filepath.Join(dir, prefix+nextRandom()) - err = fs.Mkdir(try, 0700) + err = fs.Mkdir(try, 0o700) if os.IsExist(err) { if nconflict++; nconflict > 10 { randmu.Lock() - rand = reseed() + randNum = reseed() randmu.Unlock() } continue diff --git a/go-controller/vendor/github.com/spf13/afero/mem/file.go b/go-controller/vendor/github.com/spf13/afero/mem/file.go index 5a20730c2f..62fe4498e1 100644 --- a/go-controller/vendor/github.com/spf13/afero/mem/file.go +++ b/go-controller/vendor/github.com/spf13/afero/mem/file.go @@ -18,15 +18,20 @@ import ( "bytes" "errors" "io" + "io/fs" "os" "path/filepath" "sync" "sync/atomic" "time" + + "github.com/spf13/afero/internal/common" ) const FilePathSeparator = string(filepath.Separator) +var _ fs.ReadDirFile = &File{} + type File struct { // atomic requires 64-bit alignment for struct field access at int64 @@ -71,7 +76,7 @@ func CreateFile(name string) *FileData { } func CreateDir(name string) *FileData { - return &FileData{name: name, memDir: &DirMap{}, dir: true} + return &FileData{name: name, memDir: &DirMap{}, dir: true, modtime: time.Now()} } func ChangeFileName(f *FileData, newname string) { @@ -183,10 +188,23 @@ func (f *File) Readdirnames(n int) (names []string, err error) { return names, err } +// Implements fs.ReadDirFile +func (f *File) ReadDir(n int) ([]fs.DirEntry, error) { + fi, err := f.Readdir(n) + if err != nil { + return nil, err + } + di := make([]fs.DirEntry, len(fi)) + for i, f := range fi { + di[i] = common.FileInfoDirEntry{FileInfo: f} + } + return di, nil +} + func (f *File) Read(b []byte) (n int, err error) { f.fileData.Lock() defer f.fileData.Unlock() - if f.closed == true { + if f.closed { return 0, ErrFileClosed } if len(b) > 0 && int(f.at) == len(f.fileData.data) { @@ -214,7 +232,7 @@ func (f *File) ReadAt(b []byte, off int64) (n int, err error) { } func (f *File) Truncate(size int64) error { - if f.closed == true { + if f.closed { return ErrFileClosed } if f.readOnly { @@ -227,7 +245,7 @@ func (f *File) Truncate(size int64) error { defer f.fileData.Unlock() if size > int64(len(f.fileData.data)) { diff := size - int64(len(f.fileData.data)) - f.fileData.data = append(f.fileData.data, bytes.Repeat([]byte{00}, int(diff))...) + f.fileData.data = append(f.fileData.data, bytes.Repeat([]byte{0o0}, int(diff))...) } else { f.fileData.data = f.fileData.data[0:size] } @@ -236,7 +254,7 @@ func (f *File) Truncate(size int64) error { } func (f *File) Seek(offset int64, whence int) (int64, error) { - if f.closed == true { + if f.closed { return 0, ErrFileClosed } switch whence { @@ -251,7 +269,7 @@ func (f *File) Seek(offset int64, whence int) (int64, error) { } func (f *File) Write(b []byte) (n int, err error) { - if f.closed == true { + if f.closed { return 0, ErrFileClosed } if f.readOnly { @@ -267,7 +285,7 @@ func (f *File) Write(b []byte) (n int, err error) { tail = f.fileData.data[n+int(cur):] } if diff > 0 { - f.fileData.data = append(f.fileData.data, append(bytes.Repeat([]byte{00}, int(diff)), b...)...) + f.fileData.data = append(f.fileData.data, append(bytes.Repeat([]byte{0o0}, int(diff)), b...)...) f.fileData.data = append(f.fileData.data, tail...) } else { f.fileData.data = append(f.fileData.data[:cur], b...) @@ -303,16 +321,19 @@ func (s *FileInfo) Name() string { s.Unlock() return name } + func (s *FileInfo) Mode() os.FileMode { s.Lock() defer s.Unlock() return s.mode } + func (s *FileInfo) ModTime() time.Time { s.Lock() defer s.Unlock() return s.modtime } + func (s *FileInfo) IsDir() bool { s.Lock() defer s.Unlock() @@ -330,8 +351,8 @@ func (s *FileInfo) Size() int64 { var ( ErrFileClosed = errors.New("File is closed") - ErrOutOfRange = errors.New("Out of range") - ErrTooLarge = errors.New("Too large") + ErrOutOfRange = errors.New("out of range") + ErrTooLarge = errors.New("too large") ErrFileNotFound = os.ErrNotExist ErrFileExists = os.ErrExist ErrDestinationExists = os.ErrExist diff --git a/go-controller/vendor/github.com/spf13/afero/memmap.go b/go-controller/vendor/github.com/spf13/afero/memmap.go index 5c265f92b2..e6b7d70b94 100644 --- a/go-controller/vendor/github.com/spf13/afero/memmap.go +++ b/go-controller/vendor/github.com/spf13/afero/memmap.go @@ -15,6 +15,7 @@ package afero import ( "fmt" + "io" "log" "os" "path/filepath" @@ -43,7 +44,7 @@ func (m *MemMapFs) getData() map[string]*mem.FileData { // Root should always exist, right? // TODO: what about windows? root := mem.CreateDir(FilePathSeparator) - mem.SetMode(root, os.ModeDir|0755) + mem.SetMode(root, os.ModeDir|0o755) m.data[FilePathSeparator] = root }) return m.data @@ -96,12 +97,12 @@ func (m *MemMapFs) registerWithParent(f *mem.FileData, perm os.FileMode) { pdir := filepath.Dir(filepath.Clean(f.Name())) err := m.lockfreeMkdir(pdir, perm) if err != nil { - //log.Println("Mkdir error:", err) + // log.Println("Mkdir error:", err) return } parent, err = m.lockfreeOpen(pdir) if err != nil { - //log.Println("Open after Mkdir error:", err) + // log.Println("Open after Mkdir error:", err) return } } @@ -142,6 +143,11 @@ func (m *MemMapFs) Mkdir(name string, perm os.FileMode) error { } m.mu.Lock() + // Dobule check that it doesn't exist. + if _, ok := m.getData()[name]; ok { + m.mu.Unlock() + return &os.PathError{Op: "mkdir", Path: name, Err: ErrFileExists} + } item := mem.CreateDir(name) mem.SetMode(item, os.ModeDir|perm) m.getData()[name] = item @@ -232,7 +238,7 @@ func (m *MemMapFs) OpenFile(name string, flag int, perm os.FileMode) (File, erro file = mem.NewReadOnlyFileHandle(file.(*mem.File).Data()) } if flag&os.O_APPEND > 0 { - _, err = file.Seek(0, os.SEEK_END) + _, err = file.Seek(0, io.SeekEnd) if err != nil { file.Close() return nil, err @@ -279,7 +285,7 @@ func (m *MemMapFs) RemoveAll(path string) error { defer m.mu.RUnlock() for p := range m.getData() { - if strings.HasPrefix(p, path) { + if p == path || strings.HasPrefix(p, path+FilePathSeparator) { m.mu.RUnlock() m.mu.Lock() delete(m.getData(), p) @@ -314,6 +320,18 @@ func (m *MemMapFs) Rename(oldname, newname string) error { } else { return &os.PathError{Op: "rename", Path: oldname, Err: ErrFileNotFound} } + + for p, fileData := range m.getData() { + if strings.HasPrefix(p, oldname+FilePathSeparator) { + m.mu.RUnlock() + m.mu.Lock() + delete(m.getData(), p) + p := strings.Replace(p, oldname, newname, 1) + m.getData()[p] = fileData + m.mu.Unlock() + m.mu.RLock() + } + } return nil } diff --git a/go-controller/vendor/github.com/spf13/afero/regexpfs.go b/go-controller/vendor/github.com/spf13/afero/regexpfs.go index ac359c62a0..218f3b235b 100644 --- a/go-controller/vendor/github.com/spf13/afero/regexpfs.go +++ b/go-controller/vendor/github.com/spf13/afero/regexpfs.go @@ -10,7 +10,6 @@ import ( // The RegexpFs filters files (not directories) by regular expression. Only // files matching the given regexp will be allowed, all others get a ENOENT error ( // "No such file or directory"). -// type RegexpFs struct { re *regexp.Regexp source Fs diff --git a/go-controller/vendor/github.com/spf13/afero/symlink.go b/go-controller/vendor/github.com/spf13/afero/symlink.go index d1c6ea53d9..aa6ae125b6 100644 --- a/go-controller/vendor/github.com/spf13/afero/symlink.go +++ b/go-controller/vendor/github.com/spf13/afero/symlink.go @@ -21,9 +21,9 @@ import ( // filesystems saying so. // It indicates support for 3 symlink related interfaces that implement the // behaviors of the os methods: -// - Lstat -// - Symlink, and -// - Readlink +// - Lstat +// - Symlink, and +// - Readlink type Symlinker interface { Lstater Linker diff --git a/go-controller/vendor/github.com/spf13/afero/unionFile.go b/go-controller/vendor/github.com/spf13/afero/unionFile.go index 985363eea7..62dd6c93c8 100644 --- a/go-controller/vendor/github.com/spf13/afero/unionFile.go +++ b/go-controller/vendor/github.com/spf13/afero/unionFile.go @@ -47,7 +47,7 @@ func (f *UnionFile) Read(s []byte) (int, error) { if (err == nil || err == io.EOF) && f.Base != nil { // advance the file position also in the base file, the next // call may be a write at this position (or a seek with SEEK_CUR) - if _, seekErr := f.Base.Seek(int64(n), os.SEEK_CUR); seekErr != nil { + if _, seekErr := f.Base.Seek(int64(n), io.SeekCurrent); seekErr != nil { // only overwrite err in case the seek fails: we need to // report an eventual io.EOF to the caller err = seekErr @@ -65,7 +65,7 @@ func (f *UnionFile) ReadAt(s []byte, o int64) (int, error) { if f.Layer != nil { n, err := f.Layer.ReadAt(s, o) if (err == nil || err == io.EOF) && f.Base != nil { - _, err = f.Base.Seek(o+int64(n), os.SEEK_SET) + _, err = f.Base.Seek(o+int64(n), io.SeekStart) } return n, err } @@ -130,7 +130,7 @@ func (f *UnionFile) Name() string { type DirsMerger func(lofi, bofi []os.FileInfo) ([]os.FileInfo, error) var defaultUnionMergeDirsFn = func(lofi, bofi []os.FileInfo) ([]os.FileInfo, error) { - var files = make(map[string]os.FileInfo) + files := make(map[string]os.FileInfo) for _, fi := range lofi { files[fi.Name()] = fi @@ -151,7 +151,6 @@ var defaultUnionMergeDirsFn = func(lofi, bofi []os.FileInfo) ([]os.FileInfo, err } return rfi, nil - } // Readdir will weave the two directories together and @@ -268,20 +267,14 @@ func (f *UnionFile) WriteString(s string) (n int, err error) { return 0, BADFD } -func copyToLayer(base Fs, layer Fs, name string) error { - bfh, err := base.Open(name) - if err != nil { - return err - } - defer bfh.Close() - +func copyFile(base Fs, layer Fs, name string, bfh File) error { // First make sure the directory exists exists, err := Exists(layer, filepath.Dir(name)) if err != nil { return err } if !exists { - err = layer.MkdirAll(filepath.Dir(name), 0777) // FIXME? + err = layer.MkdirAll(filepath.Dir(name), 0o777) // FIXME? if err != nil { return err } @@ -315,3 +308,23 @@ func copyToLayer(base Fs, layer Fs, name string) error { } return layer.Chtimes(name, bfi.ModTime(), bfi.ModTime()) } + +func copyToLayer(base Fs, layer Fs, name string) error { + bfh, err := base.Open(name) + if err != nil { + return err + } + defer bfh.Close() + + return copyFile(base, layer, name, bfh) +} + +func copyFileToLayer(base Fs, layer Fs, name string, flag int, perm os.FileMode) error { + bfh, err := base.OpenFile(name, flag, perm) + if err != nil { + return err + } + defer bfh.Close() + + return copyFile(base, layer, name, bfh) +} diff --git a/go-controller/vendor/github.com/spf13/afero/util.go b/go-controller/vendor/github.com/spf13/afero/util.go index 4f253f481e..9e4cba2746 100644 --- a/go-controller/vendor/github.com/spf13/afero/util.go +++ b/go-controller/vendor/github.com/spf13/afero/util.go @@ -25,6 +25,7 @@ import ( "strings" "unicode" + "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" ) @@ -42,7 +43,7 @@ func WriteReader(fs Fs, path string, r io.Reader) (err error) { ospath := filepath.FromSlash(dir) if ospath != "" { - err = fs.MkdirAll(ospath, 0777) // rwx, rw, r + err = fs.MkdirAll(ospath, 0o777) // rwx, rw, r if err != nil { if err != os.ErrExist { return err @@ -70,7 +71,7 @@ func SafeWriteReader(fs Fs, path string, r io.Reader) (err error) { ospath := filepath.FromSlash(dir) if ospath != "" { - err = fs.MkdirAll(ospath, 0777) // rwx, rw, r + err = fs.MkdirAll(ospath, 0o777) // rwx, rw, r if err != nil { return } @@ -123,7 +124,7 @@ func GetTempDir(fs Fs, subPath string) string { return addSlash(dir) } - err := fs.MkdirAll(dir, 0777) + err := fs.MkdirAll(dir, 0o777) if err != nil { panic(err) } @@ -158,16 +159,12 @@ func UnicodeSanitize(s string) string { // Transform characters with accents into plain forms. func NeuterAccents(s string) string { - t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) + t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) result, _, _ := transform.String(t, string(s)) return result } -func isMn(r rune) bool { - return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks -} - func (a Afero) FileContainsBytes(filename string, subslice []byte) (bool, error) { return FileContainsBytes(a.Fs, filename, subslice) } @@ -200,7 +197,6 @@ func FileContainsAnyBytes(fs Fs, filename string, subslices [][]byte) (bool, err // readerContains reports whether any of the subslices is within r. func readerContainsAny(r io.Reader, subslices ...[]byte) bool { - if r == nil || len(subslices) == 0 { return false } @@ -299,6 +295,9 @@ func IsEmpty(fs Fs, path string) (bool, error) { } defer f.Close() list, err := f.Readdir(-1) + if err != nil { + return false, err + } return len(list) == 0, nil } return fi.Size() == 0, nil diff --git a/go-controller/vendor/github.com/vishvananda/netns/.golangci.yml b/go-controller/vendor/github.com/vishvananda/netns/.golangci.yml new file mode 100644 index 0000000000..600bef78e2 --- /dev/null +++ b/go-controller/vendor/github.com/vishvananda/netns/.golangci.yml @@ -0,0 +1,2 @@ +run: + timeout: 5m diff --git a/go-controller/vendor/github.com/vishvananda/netns/README.md b/go-controller/vendor/github.com/vishvananda/netns/README.md index 1fdb2d3e4a..bdfedbe81f 100644 --- a/go-controller/vendor/github.com/vishvananda/netns/README.md +++ b/go-controller/vendor/github.com/vishvananda/netns/README.md @@ -23,6 +23,7 @@ import ( "fmt" "net" "runtime" + "github.com/vishvananda/netns" ) @@ -48,14 +49,3 @@ func main() { } ``` - -## NOTE - -The library can be safely used only with Go >= 1.10 due to [golang/go#20676](https://github.com/golang/go/issues/20676). - -After locking a goroutine to its current OS thread with `runtime.LockOSThread()` -and changing its network namespace, any new subsequent goroutine won't be -scheduled on that thread while it's locked. Therefore, the new goroutine -will run in a different namespace leading to unexpected results. - -See [here](https://www.weave.works/blog/linux-namespaces-golang-followup) for more details. diff --git a/go-controller/vendor/github.com/vishvananda/netns/doc.go b/go-controller/vendor/github.com/vishvananda/netns/doc.go new file mode 100644 index 0000000000..cd4093a4d7 --- /dev/null +++ b/go-controller/vendor/github.com/vishvananda/netns/doc.go @@ -0,0 +1,9 @@ +// Package netns allows ultra-simple network namespace handling. NsHandles +// can be retrieved and set. Note that the current namespace is thread +// local so actions that set and reset namespaces should use LockOSThread +// to make sure the namespace doesn't change due to a goroutine switch. +// It is best to close NsHandles when you are done with them. This can be +// accomplished via a `defer ns.Close()` on the handle. Changing namespaces +// requires elevated privileges, so in most cases this code needs to be run +// as root. +package netns diff --git a/go-controller/vendor/github.com/vishvananda/netns/netns_linux.go b/go-controller/vendor/github.com/vishvananda/netns/netns_linux.go index 36e64906b6..2ed7c7e2fa 100644 --- a/go-controller/vendor/github.com/vishvananda/netns/netns_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netns/netns_linux.go @@ -1,33 +1,31 @@ -// +build linux,go1.10 - package netns import ( "fmt" - "io/ioutil" "os" "path" "path/filepath" "strconv" "strings" - "syscall" "golang.org/x/sys/unix" ) -// Deprecated: use syscall pkg instead (go >= 1.5 needed). +// Deprecated: use golang.org/x/sys/unix pkg instead. const ( - CLONE_NEWUTS = 0x04000000 /* New utsname group? */ - CLONE_NEWIPC = 0x08000000 /* New ipcs */ - CLONE_NEWUSER = 0x10000000 /* New user namespace */ - CLONE_NEWPID = 0x20000000 /* New pid namespace */ - CLONE_NEWNET = 0x40000000 /* New network namespace */ - CLONE_IO = 0x80000000 /* Get io context */ - bindMountPath = "/run/netns" /* Bind mount path for named netns */ + CLONE_NEWUTS = unix.CLONE_NEWUTS /* New utsname group? */ + CLONE_NEWIPC = unix.CLONE_NEWIPC /* New ipcs */ + CLONE_NEWUSER = unix.CLONE_NEWUSER /* New user namespace */ + CLONE_NEWPID = unix.CLONE_NEWPID /* New pid namespace */ + CLONE_NEWNET = unix.CLONE_NEWNET /* New network namespace */ + CLONE_IO = unix.CLONE_IO /* Get io context */ ) -// Setns sets namespace using syscall. Note that this should be a method -// in syscall but it has not been added. +const bindMountPath = "/run/netns" /* Bind mount path for named netns */ + +// Setns sets namespace using golang.org/x/sys/unix.Setns. +// +// Deprecated: Use golang.org/x/sys/unix.Setns instead. func Setns(ns NsHandle, nstype int) (err error) { return unix.Setns(int(ns), nstype) } @@ -35,19 +33,20 @@ func Setns(ns NsHandle, nstype int) (err error) { // Set sets the current network namespace to the namespace represented // by NsHandle. func Set(ns NsHandle) (err error) { - return Setns(ns, CLONE_NEWNET) + return unix.Setns(int(ns), unix.CLONE_NEWNET) } // New creates a new network namespace, sets it as current and returns // a handle to it. func New() (ns NsHandle, err error) { - if err := unix.Unshare(CLONE_NEWNET); err != nil { + if err := unix.Unshare(unix.CLONE_NEWNET); err != nil { return -1, err } return Get() } -// NewNamed creates a new named network namespace and returns a handle to it +// NewNamed creates a new named network namespace, sets it as current, +// and returns a handle to it func NewNamed(name string) (NsHandle, error) { if _, err := os.Stat(bindMountPath); os.IsNotExist(err) { err = os.MkdirAll(bindMountPath, 0755) @@ -65,13 +64,15 @@ func NewNamed(name string) (NsHandle, error) { f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444) if err != nil { + newNs.Close() return None(), err } f.Close() - nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), syscall.Gettid()) - err = syscall.Mount(nsPath, namedPath, "bind", syscall.MS_BIND, "") + nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid()) + err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "") if err != nil { + newNs.Close() return None(), err } @@ -82,7 +83,7 @@ func NewNamed(name string) (NsHandle, error) { func DeleteNamed(name string) error { namedPath := path.Join(bindMountPath, name) - err := syscall.Unmount(namedPath, syscall.MNT_DETACH) + err := unix.Unmount(namedPath, unix.MNT_DETACH) if err != nil { return err } @@ -108,7 +109,7 @@ func GetFromPath(path string) (NsHandle, error) { // GetFromName gets a handle to a named network namespace such as one // created by `ip netns add`. func GetFromName(name string) (NsHandle, error) { - return GetFromPath(fmt.Sprintf("/var/run/netns/%s", name)) + return GetFromPath(filepath.Join(bindMountPath, name)) } // GetFromPid gets a handle to the network namespace of a given pid. @@ -133,33 +134,38 @@ func GetFromDocker(id string) (NsHandle, error) { } // borrowed from docker/utils/utils.go -func findCgroupMountpoint(cgroupType string) (string, error) { - output, err := ioutil.ReadFile("/proc/mounts") +func findCgroupMountpoint(cgroupType string) (int, string, error) { + output, err := os.ReadFile("/proc/mounts") if err != nil { - return "", err + return -1, "", err } // /proc/mounts has 6 fields per line, one mount per line, e.g. // cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0 for _, line := range strings.Split(string(output), "\n") { parts := strings.Split(line, " ") - if len(parts) == 6 && parts[2] == "cgroup" { - for _, opt := range strings.Split(parts[3], ",") { - if opt == cgroupType { - return parts[1], nil + if len(parts) == 6 { + switch parts[2] { + case "cgroup2": + return 2, parts[1], nil + case "cgroup": + for _, opt := range strings.Split(parts[3], ",") { + if opt == cgroupType { + return 1, parts[1], nil + } } } } } - return "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType) + return -1, "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType) } // Returns the relative path to the cgroup docker is running in. // borrowed from docker/utils/utils.go // modified to get the docker pid instead of using /proc/self -func getThisCgroup(cgroupType string) (string, error) { - dockerpid, err := ioutil.ReadFile("/var/run/docker.pid") +func getDockerCgroup(cgroupVer int, cgroupType string) (string, error) { + dockerpid, err := os.ReadFile("/var/run/docker.pid") if err != nil { return "", err } @@ -171,14 +177,15 @@ func getThisCgroup(cgroupType string) (string, error) { if err != nil { return "", err } - output, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + output, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) if err != nil { return "", err } for _, line := range strings.Split(string(output), "\n") { parts := strings.Split(line, ":") // any type used by docker should work - if parts[1] == cgroupType { + if (cgroupVer == 1 && parts[1] == cgroupType) || + (cgroupVer == 2 && parts[1] == "") { return parts[2], nil } } @@ -190,46 +197,56 @@ func getThisCgroup(cgroupType string) (string, error) { // modified to only return the first pid // modified to glob with id // modified to search for newer docker containers +// modified to look for cgroups v2 func getPidForContainer(id string) (int, error) { pid := 0 // memory is chosen randomly, any cgroup used by docker works cgroupType := "memory" - cgroupRoot, err := findCgroupMountpoint(cgroupType) + cgroupVer, cgroupRoot, err := findCgroupMountpoint(cgroupType) if err != nil { return pid, err } - cgroupThis, err := getThisCgroup(cgroupType) + cgroupDocker, err := getDockerCgroup(cgroupVer, cgroupType) if err != nil { return pid, err } id += "*" + var pidFile string + if cgroupVer == 1 { + pidFile = "tasks" + } else if cgroupVer == 2 { + pidFile = "cgroup.procs" + } else { + return -1, fmt.Errorf("Invalid cgroup version '%d'", cgroupVer) + } + attempts := []string{ - filepath.Join(cgroupRoot, cgroupThis, id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, id, pidFile), // With more recent lxc versions use, cgroup will be in lxc/ - filepath.Join(cgroupRoot, cgroupThis, "lxc", id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, "lxc", id, pidFile), // With more recent docker, cgroup will be in docker/ - filepath.Join(cgroupRoot, cgroupThis, "docker", id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, "docker", id, pidFile), // Even more recent docker versions under systemd use docker-.scope/ - filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", pidFile), // Even more recent docker versions under cgroup/systemd/docker// - filepath.Join(cgroupRoot, "..", "systemd", "docker", id, "tasks"), + filepath.Join(cgroupRoot, "..", "systemd", "docker", id, pidFile), // Kubernetes with docker and CNI is even more different. Works for BestEffort and Burstable QoS - filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, "tasks"), + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, pidFile), // Same as above but for Guaranteed QoS - filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "pod*", id, "tasks"), + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "pod*", id, pidFile), // Another flavor of containers location in recent kubernetes 1.11+. Works for BestEffort and Burstable QoS - filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile), // Same as above but for Guaranteed QoS - filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "*", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*", "docker-"+id+".scope", pidFile), // When runs inside of a container with recent kubernetes 1.11+. Works for BestEffort and Burstable QoS - filepath.Join(cgroupRoot, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile), // Same as above but for Guaranteed QoS - filepath.Join(cgroupRoot, "kubepods.slice", "*", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, "kubepods.slice", "*", "docker-"+id+".scope", pidFile), } var filename string @@ -247,7 +264,7 @@ func getPidForContainer(id string) (int, error) { return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1]) } - output, err := ioutil.ReadFile(filename) + output, err := os.ReadFile(filename) if err != nil { return pid, err } diff --git a/go-controller/vendor/github.com/vishvananda/netns/netns_unspecified.go b/go-controller/vendor/github.com/vishvananda/netns/netns_others.go similarity index 63% rename from go-controller/vendor/github.com/vishvananda/netns/netns_unspecified.go rename to go-controller/vendor/github.com/vishvananda/netns/netns_others.go index d06af62b68..0489837741 100644 --- a/go-controller/vendor/github.com/vishvananda/netns/netns_unspecified.go +++ b/go-controller/vendor/github.com/vishvananda/netns/netns_others.go @@ -1,3 +1,4 @@ +//go:build !linux // +build !linux package netns @@ -10,6 +11,14 @@ var ( ErrNotImplemented = errors.New("not implemented") ) +// Setns sets namespace using golang.org/x/sys/unix.Setns on Linux. It +// is not implemented on other platforms. +// +// Deprecated: Use golang.org/x/sys/unix.Setns instead. +func Setns(ns NsHandle, nstype int) (err error) { + return ErrNotImplemented +} + func Set(ns NsHandle) (err error) { return ErrNotImplemented } @@ -18,6 +27,14 @@ func New() (ns NsHandle, err error) { return -1, ErrNotImplemented } +func NewNamed(name string) (NsHandle, error) { + return -1, ErrNotImplemented +} + +func DeleteNamed(name string) error { + return ErrNotImplemented +} + func Get() (NsHandle, error) { return -1, ErrNotImplemented } diff --git a/go-controller/vendor/github.com/vishvananda/netns/netns.go b/go-controller/vendor/github.com/vishvananda/netns/nshandle_linux.go similarity index 75% rename from go-controller/vendor/github.com/vishvananda/netns/netns.go rename to go-controller/vendor/github.com/vishvananda/netns/nshandle_linux.go index 116befd548..1baffb66ac 100644 --- a/go-controller/vendor/github.com/vishvananda/netns/netns.go +++ b/go-controller/vendor/github.com/vishvananda/netns/nshandle_linux.go @@ -1,11 +1,3 @@ -// Package netns allows ultra-simple network namespace handling. NsHandles -// can be retrieved and set. Note that the current namespace is thread -// local so actions that set and reset namespaces should use LockOSThread -// to make sure the namespace doesn't change due to a goroutine switch. -// It is best to close NsHandles when you are done with them. This can be -// accomplished via a `defer ns.Close()` on the handle. Changing namespaces -// requires elevated privileges, so in most cases this code needs to be run -// as root. package netns import ( @@ -38,7 +30,7 @@ func (ns NsHandle) Equal(other NsHandle) bool { // String shows the file descriptor number and its dev and inode. func (ns NsHandle) String() string { if ns == -1 { - return "NS(None)" + return "NS(none)" } var s unix.Stat_t if err := unix.Fstat(int(ns), &s); err != nil { @@ -71,7 +63,7 @@ func (ns *NsHandle) Close() error { if err := unix.Close(int(*ns)); err != nil { return err } - (*ns) = -1 + *ns = -1 return nil } diff --git a/go-controller/vendor/github.com/vishvananda/netns/nshandle_others.go b/go-controller/vendor/github.com/vishvananda/netns/nshandle_others.go new file mode 100644 index 0000000000..af727bc091 --- /dev/null +++ b/go-controller/vendor/github.com/vishvananda/netns/nshandle_others.go @@ -0,0 +1,45 @@ +//go:build !linux +// +build !linux + +package netns + +// NsHandle is a handle to a network namespace. It can only be used on Linux, +// but provides stub methods on other platforms. +type NsHandle int + +// Equal determines if two network handles refer to the same network +// namespace. It is only implemented on Linux. +func (ns NsHandle) Equal(_ NsHandle) bool { + return false +} + +// String shows the file descriptor number and its dev and inode. +// It is only implemented on Linux, and returns "NS(none)" on other +// platforms. +func (ns NsHandle) String() string { + return "NS(none)" +} + +// UniqueId returns a string which uniquely identifies the namespace +// associated with the network handle. It is only implemented on Linux, +// and returns "NS(none)" on other platforms. +func (ns NsHandle) UniqueId() string { + return "NS(none)" +} + +// IsOpen returns true if Close() has not been called. It is only implemented +// on Linux and always returns false on other platforms. +func (ns NsHandle) IsOpen() bool { + return false +} + +// Close closes the NsHandle and resets its file descriptor to -1. +// It is only implemented on Linux. +func (ns *NsHandle) Close() error { + return nil +} + +// None gets an empty (closed) NsHandle. +func None() NsHandle { + return NsHandle(-1) +} diff --git a/go-controller/vendor/golang.org/x/text/encoding/internal/internal.go b/go-controller/vendor/golang.org/x/text/encoding/internal/internal.go index 75a5fd1658..413e6fc6d7 100644 --- a/go-controller/vendor/golang.org/x/text/encoding/internal/internal.go +++ b/go-controller/vendor/golang.org/x/text/encoding/internal/internal.go @@ -64,7 +64,7 @@ func (e FuncEncoding) NewEncoder() *encoding.Encoder { // byte. type RepertoireError byte -// Error implements the error interrface. +// Error implements the error interface. func (r RepertoireError) Error() string { return "encoding: rune not supported by encoding." } diff --git a/go-controller/vendor/golang.org/x/text/unicode/norm/forminfo.go b/go-controller/vendor/golang.org/x/text/unicode/norm/forminfo.go index d69ccb4f97..487335d14d 100644 --- a/go-controller/vendor/golang.org/x/text/unicode/norm/forminfo.go +++ b/go-controller/vendor/golang.org/x/text/unicode/norm/forminfo.go @@ -13,7 +13,7 @@ import "encoding/binary" // a rune to a uint16. The values take two forms. For v >= 0x8000: // bits // 15: 1 (inverse of NFD_QC bit of qcInfo) -// 13..7: qcInfo (see below). isYesD is always true (no decompostion). +// 13..7: qcInfo (see below). isYesD is always true (no decomposition). // 6..0: ccc (compressed CCC value). // For v < 0x8000, the respective rune has a decomposition and v is an index // into a byte array of UTF-8 decomposition sequences and additional info and diff --git a/go-controller/vendor/modules.txt b/go-controller/vendor/modules.txt index b54fa603c2..c1936aefa0 100644 --- a/go-controller/vendor/modules.txt +++ b/go-controller/vendor/modules.txt @@ -1,8 +1,3 @@ -# github.com/Mellanox/sriovnet v1.1.0 -## explicit; go 1.13 -github.com/Mellanox/sriovnet -github.com/Mellanox/sriovnet/pkg/utils/filesystem -github.com/Mellanox/sriovnet/pkg/utils/netlinkops # github.com/Microsoft/go-winio v0.5.2 ## explicit; go 1.13 github.com/Microsoft/go-winio @@ -149,7 +144,7 @@ github.com/google/go-cmp/cmp/internal/value ## explicit; go 1.12 github.com/google/gofuzz github.com/google/gofuzz/bytesource -# github.com/google/uuid v1.2.0 +# github.com/google/uuid v1.3.0 ## explicit github.com/google/uuid # github.com/gorilla/mux v1.8.0 @@ -202,6 +197,11 @@ github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/ github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1 github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1 github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils +# github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc +## explicit; go 1.18 +github.com/k8snetworkplumbingwg/sriovnet +github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem +github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/netlinkops # github.com/mailru/easyjson v0.7.6 ## explicit; go 1.12 github.com/mailru/easyjson/buffer @@ -336,9 +336,10 @@ github.com/safchain/ethtool # github.com/sirupsen/logrus v1.9.0 ## explicit; go 1.13 github.com/sirupsen/logrus -# github.com/spf13/afero v1.6.0 -## explicit; go 1.13 +# github.com/spf13/afero v1.9.5 +## explicit; go 1.16 github.com/spf13/afero +github.com/spf13/afero/internal/common github.com/spf13/afero/mem # github.com/spf13/pflag v1.0.5 ## explicit; go 1.12 @@ -353,12 +354,12 @@ github.com/stretchr/testify/mock # github.com/urfave/cli/v2 v2.2.0 ## explicit; go 1.11 github.com/urfave/cli/v2 -# github.com/vishvananda/netlink v1.2.1-beta.2.0.20230206183746-70ca0345eede => github.com/jcaamano/netlink v1.1.1-0.20220831114501-3a761ed61db6 +# github.com/vishvananda/netlink v1.2.1-beta.2.0.20230420174744-55c8b9515a01 => github.com/jcaamano/netlink v1.1.1-0.20220831114501-3a761ed61db6 ## explicit; go 1.12 github.com/vishvananda/netlink github.com/vishvananda/netlink/nl -# github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f -## explicit; go 1.12 +# github.com/vishvananda/netns v0.0.4 +## explicit; go 1.17 github.com/vishvananda/netns # go.opencensus.io v0.23.0 ## explicit; go 1.13 @@ -408,7 +409,7 @@ golang.org/x/sys/windows/svc # golang.org/x/term v0.5.0 ## explicit; go 1.17 golang.org/x/term -# golang.org/x/text v0.7.0 +# golang.org/x/text v0.9.0 ## explicit; go 1.17 golang.org/x/text/encoding golang.org/x/text/encoding/charmap From 0cf859598c02c66f9a66808285d129b730dd6587 Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Fri, 14 Apr 2023 10:19:44 +0200 Subject: [PATCH 14/90] Use new sriovnet.GetPfIndexByVfPciAddress function As part of this change, also use mockery to regenerate the mocked version of the SriovnetOps using the following command after changing directory to ./go-controller/pkg/util/ (future reference): ~/go/bin/mockery --name=SriovnetOps Signed-off-by: Balazs Nemeth --- go-controller/pkg/cni/cni_dpu.go | 18 +++------------- go-controller/pkg/cni/cni_dpu_test.go | 16 ++++++++------ go-controller/pkg/util/mocks/SriovnetOps.go | 24 +++++++++++++++++++++ go-controller/pkg/util/sriovnet_linux.go | 5 +++++ 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/go-controller/pkg/cni/cni_dpu.go b/go-controller/pkg/cni/cni_dpu.go index e1c9664686..85fcf34464 100644 --- a/go-controller/pkg/cni/cni_dpu.go +++ b/go-controller/pkg/cni/cni_dpu.go @@ -38,34 +38,22 @@ func (pr *PodRequest) updatePodDPUConnDetailsWithRetry(kube kube.Interface, podL } func (pr *PodRequest) addDPUConnectionDetailsAnnot(k kube.Interface, podLister corev1listers.PodLister, vfNetdevName string) error { - // 1. Verify there is a device id if pr.CNIConf.DeviceID == "" { return fmt.Errorf("DeviceID must be set for Pod request with DPU") } pciAddress := pr.CNIConf.DeviceID - // 2. Get the PF index and VF index - pfPciAddress, err := util.GetSriovnetOps().GetPfPciFromVfPci(pciAddress) - if err != nil { - return err - } vfindex, err := util.GetSriovnetOps().GetVfIndexByPciAddress(pciAddress) if err != nil { return err } - - // 3. Set dpu connection-details pod annotation - var domain, bus, dev, fn int - parsed, err := fmt.Sscanf(pfPciAddress, "%04x:%02x:%02x.%d", &domain, &bus, &dev, &fn) + pfindex, err := util.GetSriovnetOps().GetPfIndexByVfPciAddress(pciAddress) if err != nil { - return fmt.Errorf("error trying to parse PF PCI address %s: %v", pfPciAddress, err) - } - if parsed != 4 { - return fmt.Errorf("failed to parse PF PCI address %s. Unexpected format", pfPciAddress) + return err } dpuConnDetails := util.DPUConnectionDetails{ - PfId: fmt.Sprint(fn), + PfId: fmt.Sprint(pfindex), VfId: fmt.Sprint(vfindex), SandboxId: pr.SandboxID, VfNetdevName: vfNetdevName, diff --git a/go-controller/pkg/cni/cni_dpu_test.go b/go-controller/pkg/cni/cni_dpu_test.go index 5fa496bd2c..da2853621a 100644 --- a/go-controller/pkg/cni/cni_dpu_test.go +++ b/go-controller/pkg/cni/cni_dpu_test.go @@ -58,8 +58,8 @@ var _ = Describe("cni_dpu tests", func() { It("Sets dpu.connection-details pod annotation", func() { var err error pr.CNIConf.DeviceID = "0000:05:00.4" - fakeSriovnetOps.On("GetPfPciFromVfPci", pr.CNIConf.DeviceID).Return("0000:05:00.0", nil) fakeSriovnetOps.On("GetVfIndexByPciAddress", pr.CNIConf.DeviceID).Return(2, nil) + fakeSriovnetOps.On("GetPfIndexByVfPciAddress", pr.CNIConf.DeviceID).Return(0, nil) dpuCd := util.DPUConnectionDetails{ PfId: "0", VfId: "2", @@ -81,27 +81,29 @@ var _ = Describe("cni_dpu tests", func() { Expect(err).To(HaveOccurred()) }) - It("Fails if srionvet fails to get PF PCI from VF PCI", func() { + It("Fails if srionvet fails to get PF Index from VF PCI", func() { pr.CNIConf.DeviceID = "0000:05:00.4" - fakeSriovnetOps.On("GetPfPciFromVfPci", pr.CNIConf.DeviceID).Return( - "", fmt.Errorf("failed to get PF address")) + fakeSriovnetOps.On("GetVfIndexByPciAddress", pr.CNIConf.DeviceID).Return(2, nil) + fakeSriovnetOps.On("GetPfIndexByVfPciAddress", pr.CNIConf.DeviceID).Return( + -1, fmt.Errorf("failed to get PF Index")) err := pr.addDPUConnectionDetailsAnnot(&fakeKubeInterface, &podLister, "") Expect(err).To(HaveOccurred()) }) It("Fails if srionvet fails to get VF index from PF PCI address", func() { pr.CNIConf.DeviceID = "0000:05:00.4" - fakeSriovnetOps.On("GetPfPciFromVfPci", pr.CNIConf.DeviceID).Return("0000:05:00.0", nil) fakeSriovnetOps.On("GetVfIndexByPciAddress", pr.CNIConf.DeviceID).Return( -1, fmt.Errorf("failed to get VF index")) + fakeSriovnetOps.On("GetPfIndexByVfPciAddress", pr.CNIConf.DeviceID).Return(0, nil) err := pr.addDPUConnectionDetailsAnnot(&fakeKubeInterface, &podLister, "") Expect(err).To(HaveOccurred()) }) It("Fails if PF PCI address fails to parse", func() { pr.CNIConf.DeviceID = "0000:05:00.4" - fakeSriovnetOps.On("GetPfPciFromVfPci", pr.CNIConf.DeviceID).Return("05:00.0", nil) fakeSriovnetOps.On("GetVfIndexByPciAddress", pr.CNIConf.DeviceID).Return(2, nil) + fakeSriovnetOps.On("GetPfIndexByVfPciAddress", pr.CNIConf.DeviceID).Return( + -1, fmt.Errorf("failed to parse PF PCI address")) err := pr.addDPUConnectionDetailsAnnot(&fakeKubeInterface, &podLister, "") Expect(err).To(HaveOccurred()) }) @@ -110,8 +112,8 @@ var _ = Describe("cni_dpu tests", func() { var err error pod.Annotations = map[string]string{} pr.CNIConf.DeviceID = "0000:05:00.4" - fakeSriovnetOps.On("GetPfPciFromVfPci", pr.CNIConf.DeviceID).Return("0000:05:00.0", nil) fakeSriovnetOps.On("GetVfIndexByPciAddress", pr.CNIConf.DeviceID).Return(2, nil) + fakeSriovnetOps.On("GetPfIndexByVfPciAddress", pr.CNIConf.DeviceID).Return(0, nil) dpuCd := util.DPUConnectionDetails{ PfId: "0", VfId: "2", diff --git a/go-controller/pkg/util/mocks/SriovnetOps.go b/go-controller/pkg/util/mocks/SriovnetOps.go index a4635270f0..c403ded99c 100644 --- a/go-controller/pkg/util/mocks/SriovnetOps.go +++ b/go-controller/pkg/util/mocks/SriovnetOps.go @@ -67,6 +67,30 @@ func (_m *SriovnetOps) GetNetDevicesFromPci(pciAddress string) ([]string, error) return r0, r1 } +// GetPfIndexByVfPciAddress provides a mock function with given fields: vfPciAddress +func (_m *SriovnetOps) GetPfIndexByVfPciAddress(vfPciAddress string) (int, error) { + ret := _m.Called(vfPciAddress) + + var r0 int + var r1 error + if rf, ok := ret.Get(0).(func(string) (int, error)); ok { + return rf(vfPciAddress) + } + if rf, ok := ret.Get(0).(func(string) int); ok { + r0 = rf(vfPciAddress) + } else { + r0 = ret.Get(0).(int) + } + + if rf, ok := ret.Get(1).(func(string) error); ok { + r1 = rf(vfPciAddress) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // GetPfPciFromAux provides a mock function with given fields: auxDev func (_m *SriovnetOps) GetPfPciFromAux(auxDev string) (string, error) { ret := _m.Called(auxDev) diff --git a/go-controller/pkg/util/sriovnet_linux.go b/go-controller/pkg/util/sriovnet_linux.go index 3300eecb8b..df8420d14b 100644 --- a/go-controller/pkg/util/sriovnet_linux.go +++ b/go-controller/pkg/util/sriovnet_linux.go @@ -17,6 +17,7 @@ type SriovnetOps interface { GetUplinkRepresentor(vfPciAddress string) (string, error) GetUplinkRepresentorFromAux(auxDev string) (string, error) GetVfIndexByPciAddress(vfPciAddress string) (int, error) + GetPfIndexByVfPciAddress(vfPciAddress string) (int, error) GetSfIndexByAuxDev(auxDev string) (int, error) GetVfRepresentor(uplink string, vfIndex int) (string, error) GetSfRepresentor(uplink string, sfIndex int) (string, error) @@ -62,6 +63,10 @@ func (defaultSriovnetOps) GetVfIndexByPciAddress(vfPciAddress string) (int, erro return sriovnet.GetVfIndexByPciAddress(vfPciAddress) } +func (defaultSriovnetOps) GetPfIndexByVfPciAddress(vfPciAddress string) (int, error) { + return sriovnet.GetPfIndexByVfPciAddress(vfPciAddress) +} + func (defaultSriovnetOps) GetSfIndexByAuxDev(auxDev string) (int, error) { return sriovnet.GetSfIndexByAuxDev(auxDev) } From 3d63f1da27cb75ddf8f13668583cbd98b0962806 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Sat, 29 Apr 2023 11:17:04 +0200 Subject: [PATCH 15/90] Avoid external connectivity flakes in E2E When asserting external connectivity, it happens to consistently receive a 429 (Too Many Requests) status code from the sample website. As the test only verifies the presence of an external connection, even an error status code is a good condition to continue testing. Signed-off-by: Andrea Panattoni --- test/e2e/e2e.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 684a0ef492..dd64a77ae3 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -511,13 +511,10 @@ var _ = ginkgo.Describe("e2e control plane", func() { // Since this is not really a test of kubernetes in any way, we // leave it as a pre-test assertion, rather than a Ginko test. ginkgo.By("Executing a successful http request from the external internet") - resp, err := http.Get("http://google.com") + _, err := http.Get("http://google.com") if err != nil { framework.Failf("Unable to connect/talk to the internet: %v", err) } - if resp.StatusCode != http.StatusOK { - framework.Failf("Unexpected error code, expected 200, got, %v (%v)", resp.StatusCode, resp) - } masterPods, err := f.ClientSet.CoreV1().Pods("ovn-kubernetes").List(context.Background(), metav1.ListOptions{ LabelSelector: "name=ovnkube-master", From fd6dcc96232ad467d7669b2892e53efd8f908532 Mon Sep 17 00:00:00 2001 From: Yun Zhou Date: Mon, 1 May 2023 17:06:53 -0700 Subject: [PATCH 16/90] cmdDel should return if connection details annotation does not exist Signed-off-by: Yun Zhou --- go-controller/pkg/cni/cni.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index 7000d3286f..eef9acdbd7 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -179,6 +179,7 @@ func (pr *PodRequest) cmdDel(clientset *ClientSet) (*Response, error) { if err != nil { klog.Warningf("Failed to get DPU connection details annotation for pod %s/%s NAD %s: %v", pr.PodNamespace, pr.PodName, pr.nadName, err) + return response, nil } // check if this cmdDel is meant for the current sandbox, if not, directly return From e90a1038f59c04c8fa3390390fb44d10d6df46df Mon Sep 17 00:00:00 2001 From: William Zhao Date: Fri, 28 Apr 2023 20:09:28 -0400 Subject: [PATCH 17/90] Fix minor typo in error string Signed-off-by: William Zhao --- go-controller/pkg/node/default_node_network_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index bcf59342af..85572a0f4b 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -536,7 +536,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } nodeAddr := net.ParseIP(nodeAddrStr) if nodeAddr == nil { - return fmt.Errorf("failed to parse kubernetes node IP address. %v", err) + return fmt.Errorf("failed to parse kubernetes node IP address. %v", nodeAddrStr) } // Make sure that the node zone matches with the Southbound db zone. From 271b1902de81a6fa679b0968da8345a96830eee7 Mon Sep 17 00:00:00 2001 From: William Zhao Date: Fri, 28 Apr 2023 20:09:56 -0400 Subject: [PATCH 18/90] Fix handling of DPU on node address change/setting The DPU needs to use the DPU host's IP address (the tenant cluster's host internal IP address) instead of the DPU's external bridge IP address for the node primary address annotations and the L3 gateway configuration. An incorrect node primary address annotation and L3 gateway configuration results in broken flows when using NodePort or ClusterIP service. Signed-off-by: William Zhao --- go-controller/pkg/node/gateway.go | 27 ++++++++++++++++--- .../pkg/node/node_ip_handler_linux.go | 2 +- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/go-controller/pkg/node/gateway.go b/go-controller/pkg/node/gateway.go index 5a9f1e2913..cc0184bb75 100644 --- a/go-controller/pkg/node/gateway.go +++ b/go-controller/pkg/node/gateway.go @@ -373,14 +373,33 @@ type bridgeConfiguration struct { } // updateInterfaceIPAddresses sets and returns the bridge's current ips -func (b *bridgeConfiguration) updateInterfaceIPAddresses() ([]*net.IPNet, error) { +func (b *bridgeConfiguration) updateInterfaceIPAddresses(node *kapi.Node) ([]*net.IPNet, error) { b.Lock() defer b.Unlock() ifAddrs, err := getNetworkInterfaceIPAddresses(b.bridgeName) - if err == nil { - b.ips = ifAddrs + if err != nil { + return nil, err + } + + // For DPU, here we need to use the DPU host's IP address which is the tenant cluster's + // host internal IP address instead of the DPU's external bridge IP address. + if config.OvnKubeNode.Mode == types.NodeModeDPU { + nodeAddrStr, err := util.GetNodePrimaryIP(node) + if err != nil { + return nil, err + } + nodeAddr := net.ParseIP(nodeAddrStr) + if nodeAddr == nil { + return nil, fmt.Errorf("failed to parse node IP address. %v", nodeAddrStr) + } + ifAddrs, err = getDPUHostPrimaryIPAddresses(nodeAddr, ifAddrs) + if err != nil { + return nil, err + } } - return ifAddrs, err + + b.ips = ifAddrs + return ifAddrs, nil } func bridgeForInterface(intfName, nodeName, physicalNetworkName string, gwIPs []*net.IPNet) (*bridgeConfiguration, error) { diff --git a/go-controller/pkg/node/node_ip_handler_linux.go b/go-controller/pkg/node/node_ip_handler_linux.go index fa5d645f49..16243f6d90 100644 --- a/go-controller/pkg/node/node_ip_handler_linux.go +++ b/go-controller/pkg/node/node_ip_handler_linux.go @@ -227,7 +227,7 @@ func (c *addressManager) updateNodeAddressAnnotations() error { if c.useNetlink { // get updated interface IP addresses for the gateway bridge - ifAddrs, err = c.gatewayBridge.updateInterfaceIPAddresses() + ifAddrs, err = c.gatewayBridge.updateInterfaceIPAddresses(node) if err != nil { return err } From 7da19a58f38df38ae0d80d386c512f874d08312a Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 3 May 2023 11:54:07 -0400 Subject: [PATCH 19/90] Enable github actions merge group This will help resolve the issue of merging a PR then requiring another PR to be rebased to ensure we test against new HEAD. PRs will be sent to a merge queue and then executed against HEAD before they are merged. Once this commit is merged we can go enable requiring merge queue in the repo. Signed-off-by: Tim Rozet --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0d2d1f227a..e5634d1ed7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,7 @@ name: ovn-ci on: + merge_group: pull_request: branches: [ master ] schedule: From 314959ca51c3980e87cc28473469d7329c36689e Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Thu, 4 May 2023 11:24:23 -0400 Subject: [PATCH 20/90] Adds code owners Signed-off-by: Tim Rozet --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) create mode 100644 CODEOWNERS diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000000..185b6f2a7a --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @trozet @dcbw @girishmg @jcaamano From 0df955bd9f11aee1a2b28a90e53deee5218aeb89 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Wed, 26 Apr 2023 08:56:10 +0200 Subject: [PATCH 21/90] Adjust python ssl library dependency check The ssl module is already included python with its latest versions hence no need for checking pyOpenSSL dependency explicitly on such environment. Hence adjusting this check to work on different platforms. Signed-off-by: Periyasamy Palanisamy --- go-controller/cmd/ovnkube-trace/ovnkube-trace.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-controller/cmd/ovnkube-trace/ovnkube-trace.go b/go-controller/cmd/ovnkube-trace/ovnkube-trace.go index a9476d19af..e30cc0db02 100644 --- a/go-controller/cmd/ovnkube-trace/ovnkube-trace.go +++ b/go-controller/cmd/ovnkube-trace/ovnkube-trace.go @@ -887,7 +887,7 @@ func getOfprotoIPFamilyArgs(protocol string, ip net.IP) (string, string, string) func installOvnDetraceDependencies(coreclient *corev1client.CoreV1Client, restconfig *rest.Config, podName, ovnNamespace string) error { dependencies := map[string]string{ "ovs": "if type -p ovn-detrace >/dev/null 2>&1; then echo 'true' ; fi", - "pyOpenSSL": "if rpm -qa | egrep -q python3-pyOpenSSL; then echo 'true'; fi", + "pyOpenSSL": "if python -c 'import ssl; print(ssl.OPENSSL_VERSION)' > /dev/null; then echo 'true'; fi", } for dependency, dependencyCmd := range dependencies { depVerifyOut, depVerifyErr, err := execInPod(coreclient, restconfig, ovnNamespace, podName, "ovnkube-node", dependencyCmd, "") From 3405c11d409691159e8bdfc1f79e9e57e88eb7c3 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Wed, 26 Apr 2023 15:07:08 +0200 Subject: [PATCH 22/90] Provide dependency check for python3-pip There is an unknown scenario in which python ovn, ssl packages get installed using pip3 command on running ovnkube-node pod container. But currently there is no check for validating if python3-pip itself is installed on the container. Hence this adds appropriate check and throws error when pip3 is not installed already. Signed-off-by: Periyasamy Palanisamy --- .../cmd/ovnkube-trace/ovnkube-trace.go | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/go-controller/cmd/ovnkube-trace/ovnkube-trace.go b/go-controller/cmd/ovnkube-trace/ovnkube-trace.go index e30cc0db02..1bf67afb41 100644 --- a/go-controller/cmd/ovnkube-trace/ovnkube-trace.go +++ b/go-controller/cmd/ovnkube-trace/ovnkube-trace.go @@ -890,19 +890,25 @@ func installOvnDetraceDependencies(coreclient *corev1client.CoreV1Client, restco "pyOpenSSL": "if python -c 'import ssl; print(ssl.OPENSSL_VERSION)' > /dev/null; then echo 'true'; fi", } for dependency, dependencyCmd := range dependencies { - depVerifyOut, depVerifyErr, err := execInPod(coreclient, restconfig, ovnNamespace, podName, "ovnkube-node", dependencyCmd, "") + verifyOut, _, err := verifyDependency(coreclient, restconfig, podName, ovnNamespace, dependency, dependencyCmd) if err != nil { - return fmt.Errorf("dependency verification error in pod %s, container %s. Error '%v', stdOut: '%s'\n stdErr: %s", - podName, "ovnkube-node", err, depVerifyOut, depVerifyErr) + return err } - trueFalse := strings.TrimSuffix(depVerifyOut, "\n") - klog.V(10).Infof("Dependency check '%s' in pod '%s', container '%s' yielded '%s'", dependencyCmd, podName, "ovnkube-node", trueFalse) - if trueFalse != "true" { + if verifyOut != "true" { + verifyOut, verifyErr, err := verifyDependency(coreclient, restconfig, podName, ovnNamespace, "pip3", "if type -p pip3 >/dev/null 2>&1; then echo 'true' ; fi") + if err != nil { + return err + } + if verifyOut != "true" { + return fmt.Errorf("ovn-detrace error while verifying dependency pip3 in pod %s, container %s. stdOut: '%s'\n stdErr: %s", podName, + "ovnkube-node", verifyOut, verifyErr) + } installCmd := "pip3 install " + dependency depInstallOut, depInstallErr, err := execInPod(coreclient, restconfig, ovnNamespace, podName, "ovnkube-node", installCmd, "") if err != nil { - return fmt.Errorf("ovn-detrace error in pod %s, container %s. Error '%v', stdOut: '%s'\n stdErr: %s", - podName, "ovnkube-node", err, depInstallOut, depInstallErr) + return fmt.Errorf("ovn-detrace error while installing dependency %s in pod %s, container %s. Error '%v', stdOut: '%s'\n stdErr: %s", + dependency, podName, "ovnkube-node", err, depInstallOut, depInstallErr) + } klog.V(1).Infof("Install ovn-detrace dependencies output: %s\n", depInstallOut) } @@ -910,6 +916,17 @@ func installOvnDetraceDependencies(coreclient *corev1client.CoreV1Client, restco return nil } +func verifyDependency(coreclient *corev1client.CoreV1Client, restconfig *rest.Config, podName, ovnNamespace, dependency, depCheckCommand string) (string, string, error) { + depVerifyOut, depVerifyErr, err := execInPod(coreclient, restconfig, ovnNamespace, podName, "ovnkube-node", depCheckCommand, "") + if err != nil { + return "", "", fmt.Errorf("ovn-detrace error while verifying dependency %s in pod %s, container %s. Error '%v', stdOut: '%s'\n stdErr: %s", + dependency, podName, "ovnkube-node", err, depVerifyOut, depVerifyErr) + } + trueFalse := strings.TrimSuffix(depVerifyOut, "\n") + klog.V(10).Infof("Dependency %s check '%s' in pod '%s', container '%s' yielded '%s'", dependency, depCheckCommand, podName, "ovnkube-node", trueFalse) + return trueFalse, depVerifyErr, nil +} + // runOvnDetrace runs an ovn-detrace command for the given input. // Returns error if dependencies are not met (allows for graceful handling of those issues). func runOvnDetrace(coreclient *corev1client.CoreV1Client, restconfig *rest.Config, direction string, srcPodInfo *PodInfo, From 89da20aeda8330be98a53e08b72f3afee71a8bba Mon Sep 17 00:00:00 2001 From: Martin Kennelly Date: Tue, 25 Apr 2023 14:50:03 +0100 Subject: [PATCH 23/90] Add route manager Signed-off-by: Martin Kennelly --- .../node/default_node_network_controller.go | 42 +- go-controller/pkg/node/gateway_init.go | 27 +- .../pkg/node/gateway_init_linux_test.go | 242 +++++---- go-controller/pkg/node/gateway_localnet.go | 5 +- go-controller/pkg/node/gateway_shared_intf.go | 35 +- go-controller/pkg/node/management-port-dpu.go | 12 +- go-controller/pkg/node/management-port.go | 12 +- .../pkg/node/management-port_dpu_test.go | 18 +- .../pkg/node/management-port_linux.go | 42 +- .../pkg/node/management-port_linux_test.go | 80 ++- go-controller/pkg/node/route_manager.go | 503 ++++++++++++++++++ go-controller/pkg/node/route_manager_test.go | 284 ++++++++++ go-controller/pkg/util/net_linux.go | 37 -- go-controller/pkg/util/net_linux_unit_test.go | 176 ------ 14 files changed, 1091 insertions(+), 424 deletions(-) create mode 100644 go-controller/pkg/node/route_manager.go create mode 100644 go-controller/pkg/node/route_manager_test.go diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 85572a0f4b..039240bcb9 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -100,6 +100,7 @@ type DefaultNodeNetworkController struct { // Node healthcheck server for cloud load balancers healthzServer *proxierHealthUpdater + routeManager *routeManager // retry framework for namespaces, used for the removal of stale conntrack entries for external gateways retryNamespaces *retry.RetryFramework @@ -117,6 +118,7 @@ func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, sto stopChan: stopChan, wg: wg, }, + routeManager: newRouteManager(wg, true, 2*time.Minute), } } @@ -454,7 +456,7 @@ func handleNetdevResources(resourceName string) (string, error) { } func createNodeManagementPorts(name string, nodeAnnotator kube.Annotator, waiter *startupWaiter, - subnets []*net.IPNet) ([]managementPortEntry, *managementPortConfig, error) { + subnets []*net.IPNet, routeManager *routeManager) ([]managementPortEntry, *managementPortConfig, error) { // If netdevice name is not provided in the full mode then management port backed by OVS internal port. // If it is provided then it is backed by VF or SF and need to determine its representor name to plug // into OVS integrational bridge @@ -479,7 +481,7 @@ func createNodeManagementPorts(name string, nodeAnnotator kube.Annotator, waiter var mgmtPortConfig *managementPortConfig mgmtPorts := make([]managementPortEntry, 0) for _, port := range ports { - config, err := port.Create(nodeAnnotator, waiter) + config, err := port.Create(routeManager, nodeAnnotator, waiter) if err != nil { return nil, nil, err } @@ -525,6 +527,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { if err := level.Set("5"); err != nil { klog.Errorf("Setting klog \"loglevel\" to 5 failed, err: %v", err) } + go nc.routeManager.run(ctx.Done()) if node, err = nc.Kube.GetNode(nc.name); err != nil { return fmt.Errorf("error retrieving node %s: %v", nc.name, err) @@ -626,7 +629,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } // Setup management ports - mgmtPorts, mgmtPortConfig, err := createNodeManagementPorts(nc.name, nodeAnnotator, waiter, subnets) + mgmtPorts, mgmtPortConfig, err := createNodeManagementPorts(nc.name, nodeAnnotator, waiter, subnets, nc.routeManager) if err != nil { return err } @@ -682,7 +685,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { (initialTopoVersion >= types.OvnRoutingViaHostTopoVersion) { // Configure route for svc towards shared gw bridge // Have to have the route to bridge for multi-NIC mode, where the default gateway may go to a non-OVS interface - if err := configureSvcRouteViaBridge(bridgeName); err != nil { + if err := configureSvcRouteViaBridge(nc.routeManager, bridgeName); err != nil { return err } needLegacySvcRoute = false @@ -698,17 +701,22 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { return fmt.Errorf("unable to get link for %s, error: %v", types.K8sMgmtIntfName, err) } var gwIP net.IP + var routes []route for _, subnet := range config.Kubernetes.ServiceCIDRs { if utilnet.IsIPv4CIDR(subnet) { gwIP = mgmtPortConfig.ipv4.gwIP } else { gwIP = mgmtPortConfig.ipv6.gwIP } - err := util.LinkRoutesApply(link, gwIP, []*net.IPNet{subnet}, config.Default.RoutableMTU, nil) - if err != nil { - return fmt.Errorf("unable to add legacy route for services via mp0, error: %v", err) - } + subnet := *subnet + routes = append(routes, route{ + gwIP: gwIP, + subnet: &subnet, + mtu: config.Default.RoutableMTU, + srcIP: nil, + }) } + nc.routeManager.add(routesPerLink{link, routes}) } } } @@ -723,7 +731,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { // migrate service route from ovn-k8s-mp0 to shared gw bridge if (initialTopoVersion < types.OvnHostToSvcOFTopoVersion && config.GatewayModeShared == config.Gateway.Mode) || (initialTopoVersion < types.OvnRoutingViaHostTopoVersion) { - if err := upgradeServiceRoute(bridgeName); err != nil { + if err := upgradeServiceRoute(nc.routeManager, bridgeName); err != nil { klog.Fatalf("Failed to upgrade service route for node, error: %v", err) } } @@ -781,7 +789,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { // start management ports health check for _, mgmtPort := range mgmtPorts { - mgmtPort.port.CheckManagementPortHealth(mgmtPort.config, nc.stopChan) + mgmtPort.port.CheckManagementPortHealth(nc.routeManager, mgmtPort.config, nc.stopChan) // Start the health checking server used by egressip, if EgressIPNodeHealthCheckPort is specified if err := nc.startEgressIPHealthCheckingServer(mgmtPort); err != nil { return err @@ -1055,22 +1063,24 @@ func (nc *DefaultNodeNetworkController) validateVTEPInterfaceMTU() error { return nil } -func configureSvcRouteViaBridge(bridge string) error { - return configureSvcRouteViaInterface(bridge, DummyNextHopIPs()) +func configureSvcRouteViaBridge(routeManager *routeManager, bridge string) error { + return configureSvcRouteViaInterface(routeManager, bridge, DummyNextHopIPs()) } -func upgradeServiceRoute(bridgeName string) error { +func upgradeServiceRoute(routeManager *routeManager, bridgeName string) error { klog.Info("Updating K8S Service route") // Flush old routes link, err := util.LinkSetUp(types.K8sMgmtIntfName) if err != nil { return fmt.Errorf("unable to get link: %s, error: %v", types.K8sMgmtIntfName, err) } - if err := util.LinkRoutesDel(link, config.Kubernetes.ServiceCIDRs); err != nil { - return fmt.Errorf("unable to delete routes on upgrade, error: %v", err) + for _, serviceCIDR := range config.Kubernetes.ServiceCIDRs { + serviceCIDR := *serviceCIDR + routeManager.add(routesPerLink{link, []route{{subnet: &serviceCIDR}}}) } + // add route via OVS bridge - if err := configureSvcRouteViaBridge(bridgeName); err != nil { + if err := configureSvcRouteViaBridge(routeManager, bridgeName); err != nil { return fmt.Errorf("unable to add svc route via OVS bridge interface, error: %v", err) } klog.Info("Successfully updated Kubernetes service route towards OVS") diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index 039f512dc5..06f4795403 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -278,12 +278,13 @@ func getInterfaceByIP(ip net.IP) (string, error) { } // configureSvcRouteViaInterface routes svc traffic through the provided interface -func configureSvcRouteViaInterface(iface string, gwIPs []net.IP) error { +func configureSvcRouteViaInterface(routeManager *routeManager, iface string, gwIPs []net.IP) error { link, err := util.LinkSetUp(iface) if err != nil { return fmt.Errorf("unable to get link for %s, error: %v", iface, err) } + var routes []route for _, subnet := range config.Kubernetes.ServiceCIDRs { gwIP, err := util.MatchIPFamily(utilnet.IsIPv6CIDR(subnet), gwIPs) if err != nil { @@ -295,11 +296,17 @@ func configureSvcRouteViaInterface(iface string, gwIPs []net.IP) error { if config.Default.RoutableMTU != 0 { mtu = config.Default.RoutableMTU } - - err = util.LinkRoutesApply(link, gwIP[0], []*net.IPNet{subnet}, mtu, nil) - if err != nil { - return fmt.Errorf("unable to add/update route for service via %s for gwIP %s, error: %v", iface, gwIP[0].String(), err) - } + subnetCopy := *subnet + gwIPCopy := gwIP[0] + routes = append(routes, route{ + gwIP: gwIPCopy, + subnet: &subnetCopy, + mtu: mtu, + srcIP: nil, + }) + } + if len(routes) > 0 { + routeManager.add(routesPerLink{link, routes}) } return nil } @@ -354,11 +361,11 @@ func (nc *DefaultNodeNetworkController) initGateway(subnets []*net.IPNet, nodeAn case config.GatewayModeLocal: klog.Info("Preparing Local Gateway") gw, err = newLocalGateway(nc.name, subnets, gatewayNextHops, gatewayIntf, egressGWInterface, ifAddrs, nodeAnnotator, - managementPortConfig, nc.Kube, nc.watchFactory) + managementPortConfig, nc.Kube, nc.watchFactory, nc.routeManager) case config.GatewayModeShared: klog.Info("Preparing Shared Gateway") gw, err = newSharedGateway(nc.name, subnets, gatewayNextHops, gatewayIntf, egressGWInterface, ifAddrs, nodeAnnotator, nc.Kube, - managementPortConfig, nc.watchFactory) + managementPortConfig, nc.watchFactory, nc.routeManager) case config.GatewayModeDisabled: var chassisID string klog.Info("Gateway Mode is disabled") @@ -457,11 +464,11 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP) er return fmt.Errorf("failed to set the node masquerade IP on the ext bridge %s: %v", gwIntf, err) } - if err := addMasqueradeRoute(gwIntf, nc.name, ifAddrs, nc.watchFactory); err != nil { + if err := addMasqueradeRoute(nc.routeManager, gwIntf, nc.name, ifAddrs, nc.watchFactory); err != nil { return fmt.Errorf("failed to set the node masquerade route to OVN: %v", err) } - err = configureSvcRouteViaInterface(gatewayIntf, gatewayNextHops) + err = configureSvcRouteViaInterface(nc.routeManager, gatewayIntf, gatewayNextHops) if err != nil { return err } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index b4129771b0..5e19a2eeb5 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -8,9 +8,11 @@ import ( "context" "fmt" "net" + "runtime" "strings" "sync" "syscall" + "time" "github.com/k8snetworkplumbingwg/sriovnet" "github.com/stretchr/testify/mock" @@ -249,7 +251,13 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, Expect(err).NotTo(HaveOccurred()) err = nodeAnnotator.Run() Expect(err).NotTo(HaveOccurred()) - + rm := newRouteManager(wg, true, 10*time.Second) + wg.Add(1) + go testNS.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + rm.run(stop) + return nil + }) err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() @@ -257,11 +265,10 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, Expect(err).NotTo(HaveOccurred()) ifAddrs := ovntest.MustParseIPNets(eth0CIDR) sharedGw, err := newSharedGateway(nodeName, ovntest.MustParseIPNets(nodeSubnet), gatewayNextHops, gatewayIntf, "", ifAddrs, nodeAnnotator, k, - &fakeMgmtPortConfig, wf) + &fakeMgmtPortConfig, wf, rm) Expect(err).NotTo(HaveOccurred()) err = sharedGw.Init(wf, stop, wg) Expect(err).NotTo(HaveOccurred()) - err = nodeAnnotator.Run() Expect(err).NotTo(HaveOccurred()) @@ -292,13 +299,19 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, LinkIndex: l.Attrs().Index, Src: ifAddrs[0].IP, } - route, err := util.LinkRouteGetFilteredRoute( - expRoute, - netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_SRC, - ) - Expect(err).NotTo(HaveOccurred()) - Expect(route).ToNot(BeNil()) - + Eventually(func() error { + r, err := util.LinkRouteGetFilteredRoute( + expRoute, + netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_SRC, + ) + if err != nil { + return err + } + if r == nil { + return fmt.Errorf("failed to find route") + } + return nil + }, 1*time.Second).ShouldNot(HaveOccurred()) return nil }) Expect(err).NotTo(HaveOccurred()) @@ -609,14 +622,21 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, ifAddrs := ovntest.MustParseIPNets(hostCIDR) ifAddrs[0].IP = ovntest.MustParseIP(dpuIP) + rm := newRouteManager(wg, true, 10*time.Second) + wg.Add(1) + go testNS.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + rm.run(stop) + return nil + }) + err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() gatewayNextHops, gatewayIntf, err := getGatewayNextHops() Expect(err).NotTo(HaveOccurred()) sharedGw, err := newSharedGateway(nodeName, ovntest.MustParseIPNets(nodeSubnet), gatewayNextHops, - gatewayIntf, "", ifAddrs, nodeAnnotator, k, &fakeMgmtPortConfig, wf) - + gatewayIntf, "", ifAddrs, nodeAnnotator, k, &fakeMgmtPortConfig, wf, rm) Expect(err).NotTo(HaveOccurred()) err = sharedGw.Init(wf, stop, wg) Expect(err).NotTo(HaveOccurred()) @@ -712,6 +732,13 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, cnnci := NewCommonNodeNetworkControllerInfo(nil, wf, nil, nodeName, false) nc := newDefaultNodeNetworkController(cnnci, stop, wg) + // must run route manager manually which is usually started with nc.Start() + wg.Add(1) + go testNS.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + nc.routeManager.run(stop) + return nil + }) err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() @@ -728,12 +755,19 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, LinkIndex: link.Attrs().Index, Gw: ovntest.MustParseIP(gwIP), } - route, err := util.LinkRouteGetFilteredRoute( - expRoute, - netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_GW, - ) - Expect(err).NotTo(HaveOccurred()) - Expect(route).ToNot(BeNil()) + Eventually(func() error { + r, err := util.LinkRouteGetFilteredRoute( + expRoute, + netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_GW, + ) + if err != nil { + return err + } + if r == nil { + return fmt.Errorf("failed to find route") + } + return nil + }, 1*time.Second).ShouldNot(HaveOccurred()) // check that the masquerade route was added expRoute = &netlink.Route{ @@ -741,13 +775,19 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, LinkIndex: link.Attrs().Index, Src: ovntest.MustParseIP(hostIP), } - route, err = util.LinkRouteGetFilteredRoute( - expRoute, - netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_SRC, - ) - Expect(err).NotTo(HaveOccurred()) - Expect(route).ToNot(BeNil()) - + Eventually(func() error { + r, err := util.LinkRouteGetFilteredRoute( + expRoute, + netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_GW, + ) + if err != nil { + return err + } + if r == nil { + return fmt.Errorf("failed to find route") + } + return nil + }, 1*time.Second).ShouldNot(HaveOccurred()) return nil }) Expect(err).NotTo(HaveOccurred()) @@ -1012,7 +1052,13 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0`, Expect(err).NotTo(HaveOccurred()) err = nodeAnnotator.Run() Expect(err).NotTo(HaveOccurred()) - + rm := newRouteManager(wg, true, 10*time.Second) + wg.Add(1) + go testNS.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + rm.run(stop) + return nil + }) err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() @@ -1020,7 +1066,7 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0`, Expect(err).NotTo(HaveOccurred()) ifAddrs := ovntest.MustParseIPNets(eth0CIDR) localGw, err := newLocalGateway(nodeName, ovntest.MustParseIPNets(nodeSubnet), gatewayNextHops, gatewayIntf, "", ifAddrs, - nodeAnnotator, &fakeMgmtPortConfig, k, wf) + nodeAnnotator, &fakeMgmtPortConfig, k, wf, rm) Expect(err).NotTo(HaveOccurred()) err = localGw.Init(wf, stop, wg) Expect(err).NotTo(HaveOccurred()) @@ -1055,13 +1101,19 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0`, LinkIndex: l.Attrs().Index, Src: ifAddrs[0].IP, } - route, err := util.LinkRouteGetFilteredRoute( - expRoute, - netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_SRC, - ) - Expect(err).NotTo(HaveOccurred()) - Expect(route).ToNot(BeNil()) - + Eventually(func() error { + r, err := util.LinkRouteGetFilteredRoute( + expRoute, + netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_SRC, + ) + if err != nil { + return err + } + if r == nil { + return fmt.Errorf("failed to find route") + } + return nil + }, 1*time.Second).ShouldNot(HaveOccurred()) return nil }) Expect(err).NotTo(HaveOccurred()) @@ -1166,6 +1218,7 @@ var _ = Describe("Gateway Init Operations", func() { app.Flags = config.Flags var err error + runtime.LockOSThread() testNS, err = testutils.NewNS() Expect(err).NotTo(HaveOccurred()) }) @@ -1173,6 +1226,7 @@ var _ = Describe("Gateway Init Operations", func() { AfterEach(func() { Expect(testNS.Close()).To(Succeed()) Expect(testutils.UnmountNS(testNS)).To(Succeed()) + runtime.UnlockOSThread() }) Context("Setting up the gateway bridge", func() { @@ -1501,8 +1555,16 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) netlinkMock.On("RouteAdd", expectedRoute).Return(nil) - - err = configureSvcRouteViaInterface("ens1f0", gwIPs) + wg := &sync.WaitGroup{} + rm := newRouteManager(wg, true, 10*time.Second) + stopCh := make(chan struct{}) + wg.Add(1) + go rm.run(stopCh) + defer func() { + close(stopCh) + wg.Wait() + }() + err = configureSvcRouteViaInterface(rm, "ens1f0", gwIPs) Expect(err).ToNot(HaveOccurred()) }) @@ -1537,87 +1599,33 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*previousRoute}, nil) netlinkMock.On("RouteReplace", expectedRoute).Return(nil) - - err = configureSvcRouteViaInterface("ens1f0", gwIPs) - Expect(err).ToNot(HaveOccurred()) - }) - - It("Fails if link route list fails", func() { - _, ipnet, err := net.ParseCIDR("10.96.0.0/16") + wg := &sync.WaitGroup{} + rm := newRouteManager(wg, true, 10*time.Second) + stopCh := make(chan struct{}) + go rm.run(stopCh) + wg.Add(1) + defer func() { + close(stopCh) + wg.Wait() + }() + + err = configureSvcRouteViaInterface(rm, "ens1f0", gwIPs) Expect(err).ToNot(HaveOccurred()) - config.Kubernetes.ServiceCIDRs = []*net.IPNet{ipnet} - gwIPs := []net.IP{net.ParseIP("10.0.0.11")} - - lnk := &linkMock.Link{} - lnkAttr := &netlink.LinkAttrs{ - Name: "ens1f0", - Index: 5, - } - lnk.On("Attrs").Return(lnkAttr) - - netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) - netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) - netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return(nil, fmt.Errorf("failed to list routes")) - - err = configureSvcRouteViaInterface("ens1f0", gwIPs) - Expect(err).To(HaveOccurred()) - }) - - It("Fails if link route add fails", func() { - _, ipnet, err := net.ParseCIDR("10.96.0.0/16") - Expect(err).ToNot(HaveOccurred()) - config.Kubernetes.ServiceCIDRs = []*net.IPNet{ipnet} - gwIPs := []net.IP{net.ParseIP("10.0.0.11")} - - lnk := &linkMock.Link{} - lnkAttr := &netlink.LinkAttrs{ - Name: "ens1f0", - Index: 5, - } - - lnk.On("Attrs").Return(lnkAttr) - netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) - netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) - netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) - netlinkMock.On("RouteAdd", mock.Anything).Return(fmt.Errorf("failed to replace route")) - - err = configureSvcRouteViaInterface("ens1f0", gwIPs) - Expect(err).To(HaveOccurred()) - }) - - It("Fails if link route replace fails", func() { - _, ipnet, err := net.ParseCIDR("10.96.0.0/16") - Expect(err).ToNot(HaveOccurred()) - config.Kubernetes.ServiceCIDRs = []*net.IPNet{ipnet} - gwIPs := []net.IP{net.ParseIP("10.0.0.11")} - - lnk := &linkMock.Link{} - lnkAttr := &netlink.LinkAttrs{ - Name: "ens1f0", - Index: 5, - } - previousRoute := &netlink.Route{ - Dst: ipnet, - LinkIndex: 5, - Scope: netlink.SCOPE_UNIVERSE, - Gw: gwIPs[0], - MTU: config.Default.MTU - 100, - } - - lnk.On("Attrs").Return(lnkAttr) - netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) - netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) - netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*previousRoute}, nil) - netlinkMock.On("RouteReplace", mock.Anything).Return(fmt.Errorf("failed to replace route")) - - err = configureSvcRouteViaInterface("ens1f0", gwIPs) - Expect(err).To(HaveOccurred()) }) It("Fails if link set up fails", func() { netlinkMock.On("LinkByName", mock.Anything).Return(nil, fmt.Errorf("failed to find interface")) gwIPs := []net.IP{net.ParseIP("10.0.0.11")} - err := configureSvcRouteViaInterface("ens1f0", gwIPs) + wg := &sync.WaitGroup{} + rm := newRouteManager(wg, true, 10*time.Second) + stopCh := make(chan struct{}) + go rm.run(stopCh) + wg.Add(1) + defer func() { + close(stopCh) + wg.Wait() + }() + err := configureSvcRouteViaInterface(rm, "ens1f0", gwIPs) Expect(err).To(HaveOccurred()) }) @@ -1629,8 +1637,16 @@ var _ = Describe("Gateway unit tests", func() { gwIPs := []net.IP{net.ParseIP("10.0.0.11")} netlinkMock.On("LinkByName", mock.Anything).Return(nil, nil) netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) - - err = configureSvcRouteViaInterface("ens1f0", gwIPs) + wg := &sync.WaitGroup{} + rm := newRouteManager(wg, true, 10*time.Second) + stopCh := make(chan struct{}) + go rm.run(stopCh) + wg.Add(1) + defer func() { + close(stopCh) + wg.Wait() + }() + err = configureSvcRouteViaInterface(rm, "ens1f0", gwIPs) Expect(err).To(HaveOccurred()) }) }) diff --git a/go-controller/pkg/node/gateway_localnet.go b/go-controller/pkg/node/gateway_localnet.go index 3c9299d639..b8fb36936b 100644 --- a/go-controller/pkg/node/gateway_localnet.go +++ b/go-controller/pkg/node/gateway_localnet.go @@ -19,7 +19,8 @@ import ( ) func newLocalGateway(nodeName string, hostSubnets []*net.IPNet, gwNextHops []net.IP, gwIntf, egressGWIntf string, gwIPs []*net.IPNet, - nodeAnnotator kube.Annotator, cfg *managementPortConfig, kube kube.Interface, watchFactory factory.NodeWatchFactory) (*gateway, error) { + nodeAnnotator kube.Annotator, cfg *managementPortConfig, kube kube.Interface, watchFactory factory.NodeWatchFactory, + routeManager *routeManager) (*gateway, error) { klog.Info("Creating new local gateway") gw := &gateway{} @@ -95,7 +96,7 @@ func newLocalGateway(nodeName string, hostSubnets []*net.IPNet, gwNextHops []net return fmt.Errorf("failed to set the node masquerade IP on the ext bridge %s: %v", gwBridge.bridgeName, err) } - if err := addMasqueradeRoute(gwBridge.bridgeName, nodeName, gwIPs, watchFactory); err != nil { + if err := addMasqueradeRoute(routeManager, gwBridge.bridgeName, nodeName, gwIPs, watchFactory); err != nil { return fmt.Errorf("failed to set the node masquerade route to OVN: %v", err) } diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index abadbe68fe..b2220a0e36 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -1693,7 +1693,8 @@ func initSvcViaMgmPortRoutingRules(hostSubnets []*net.IPNet) error { } func newSharedGateway(nodeName string, subnets []*net.IPNet, gwNextHops []net.IP, gwIntf, egressGWIntf string, - gwIPs []*net.IPNet, nodeAnnotator kube.Annotator, kube kube.Interface, cfg *managementPortConfig, watchFactory factory.NodeWatchFactory) (*gateway, error) { + gwIPs []*net.IPNet, nodeAnnotator kube.Annotator, kube kube.Interface, cfg *managementPortConfig, + watchFactory factory.NodeWatchFactory, routeManager *routeManager) (*gateway, error) { klog.Info("Creating new shared gateway") gw := &gateway{} @@ -1754,7 +1755,7 @@ func newSharedGateway(nodeName string, subnets []*net.IPNet, gwNextHops []net.IP return fmt.Errorf("failed to set the node masquerade IP on the ext bridge %s: %v", gwBridge.bridgeName, err) } - if err := addMasqueradeRoute(gwBridge.bridgeName, nodeName, gwIPs, watchFactory); err != nil { + if err := addMasqueradeRoute(routeManager, gwBridge.bridgeName, nodeName, gwIPs, watchFactory); err != nil { return fmt.Errorf("failed to set the node masquerade route to OVN: %v", err) } } @@ -1932,7 +1933,7 @@ func svcToCookie(namespace string, name string, token string, port int32) (strin return fmt.Sprintf("0x%x", h.Sum64()), nil } -func addMasqueradeRoute(netIfaceName, nodeName string, ifAddrs []*net.IPNet, watchFactory factory.NodeWatchFactory) error { +func addMasqueradeRoute(routeManager *routeManager, netIfaceName, nodeName string, ifAddrs []*net.IPNet, watchFactory factory.NodeWatchFactory) error { var ipv4, ipv6 net.IP findIPs := func(ips []net.IP) error { var err error @@ -1991,23 +1992,33 @@ func addMasqueradeRoute(netIfaceName, nodeName string, ifAddrs []*net.IPNet, wat if err != nil { return fmt.Errorf("unable to find shared gw bridge interface: %s", netIfaceName) } - + mtu := 0 + var routes []route if ipv4 != nil { _, masqIPNet, _ := net.ParseCIDR(fmt.Sprintf("%s/32", types.V4OVNMasqueradeIP)) klog.Infof("Setting OVN Masquerade route with source: %s", ipv4) - err = util.LinkRoutesApply(netIfaceLink, nil, []*net.IPNet{masqIPNet}, 0, ipv4) - if err != nil { - return fmt.Errorf("unable to add OVN masquerade route to host, error: %v", err) - } + + routes = append(routes, route{ + gwIP: nil, + subnet: masqIPNet, + mtu: mtu, + srcIP: ipv4, + }) } if ipv6 != nil { _, masqIPNet, _ := net.ParseCIDR(fmt.Sprintf("%s/128", types.V6OVNMasqueradeIP)) klog.Infof("Setting OVN Masquerade route with source: %s", ipv6) - err = util.LinkRoutesApply(netIfaceLink, nil, []*net.IPNet{masqIPNet}, 0, ipv6) - if err != nil { - return fmt.Errorf("unable to add OVN masquerade route to host, error: %v", err) - } + + routes = append(routes, route{ + gwIP: nil, + subnet: masqIPNet, + mtu: mtu, + srcIP: ipv6, + }) + } + if len(routes) > 0 { + routeManager.add(routesPerLink{netIfaceLink, routes}) } return nil diff --git a/go-controller/pkg/node/management-port-dpu.go b/go-controller/pkg/node/management-port-dpu.go index 87d21505ae..638e1e60db 100644 --- a/go-controller/pkg/node/management-port-dpu.go +++ b/go-controller/pkg/node/management-port-dpu.go @@ -37,7 +37,7 @@ func newManagementPortRepresentor(nodeName string, hostSubnets []*net.IPNet) Man } } -func (mp *managementPortRepresentor) Create(nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) { +func (mp *managementPortRepresentor) Create(_ *routeManager, nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) { k8sMgmtIntfName := types.K8sMgmtIntfName if config.OvnKubeNode.MgmtPortRepresentor != "" { k8sMgmtIntfName += "_0" @@ -152,7 +152,7 @@ func (mp *managementPortRepresentor) checkRepresentorPortHealth(cfg *managementP } } -func (mp *managementPortRepresentor) CheckManagementPortHealth(cfg *managementPortConfig, stopChan chan struct{}) { +func (mp *managementPortRepresentor) CheckManagementPortHealth(_ *routeManager, cfg *managementPortConfig, stopChan chan struct{}) { go wait.Until( func() { mp.checkRepresentorPortHealth(cfg) @@ -179,7 +179,7 @@ func newManagementPortNetdev(hostSubnets []*net.IPNet) ManagementPort { } } -func (mp *managementPortNetdev) Create(nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) { +func (mp *managementPortNetdev) Create(routeManager *routeManager, nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) { klog.Infof("Lookup netdevice link and existing management port") // get netdev that is used for management port. link, err := util.GetNetLinkOps().LinkByName(mp.netdevName) @@ -256,17 +256,17 @@ func (mp *managementPortNetdev) Create(nodeAnnotator kube.Annotator, waiter *sta } // Setup Iptable and routes - cfg, err := createPlatformManagementPort(types.K8sMgmtIntfName, mp.hostSubnets) + cfg, err := createPlatformManagementPort(routeManager, types.K8sMgmtIntfName, mp.hostSubnets) if err != nil { return nil, err } return cfg, nil } -func (mp *managementPortNetdev) CheckManagementPortHealth(cfg *managementPortConfig, stopChan chan struct{}) { +func (mp *managementPortNetdev) CheckManagementPortHealth(routeManager *routeManager, cfg *managementPortConfig, stopChan chan struct{}) { go wait.Until( func() { - checkManagementPortHealth(cfg) + checkManagementPortHealth(routeManager, cfg) }, 30*time.Second, stopChan) diff --git a/go-controller/pkg/node/management-port.go b/go-controller/pkg/node/management-port.go index a2125bb286..a5c98aba47 100644 --- a/go-controller/pkg/node/management-port.go +++ b/go-controller/pkg/node/management-port.go @@ -20,10 +20,10 @@ import ( type ManagementPort interface { // Create Management port, use annotator to update node annotation with management port details // and waiter to set up condition to wait on for management port creation - Create(nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) + Create(routeManager *routeManager, nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) // CheckManagementPortHealth checks periodically for management port health until stopChan is posted // or closed and reports any warnings/errors to log - CheckManagementPortHealth(cfg *managementPortConfig, stopChan chan struct{}) + CheckManagementPortHealth(routeManager *routeManager, cfg *managementPortConfig, stopChan chan struct{}) // Currently, the management port(s) that doesn't have an assignable IP address are the following cases: // - Full mode with HW backed device (e.g. Virtual Function Representor). // - DPU mode with Virtual Function Representor. @@ -75,7 +75,7 @@ func newManagementPort(nodeName string, hostSubnets []*net.IPNet) ManagementPort } } -func (mp *managementPort) Create(nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) { +func (mp *managementPort) Create(routeManager *routeManager, nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) { for _, mgmtPortName := range []string{types.K8sMgmtIntfName, types.K8sMgmtIntfName + "_0"} { if err := syncMgmtPortInterface(mp.hostSubnets, mgmtPortName, true); err != nil { return nil, fmt.Errorf("failed to sync management port: %v", err) @@ -108,7 +108,7 @@ func (mp *managementPort) Create(nodeAnnotator kube.Annotator, waiter *startupWa return nil, err } - cfg, err := createPlatformManagementPort(types.K8sMgmtIntfName, mp.hostSubnets) + cfg, err := createPlatformManagementPort(routeManager, types.K8sMgmtIntfName, mp.hostSubnets) if err != nil { return nil, err } @@ -121,10 +121,10 @@ func (mp *managementPort) Create(nodeAnnotator kube.Annotator, waiter *startupWa return cfg, nil } -func (mp *managementPort) CheckManagementPortHealth(cfg *managementPortConfig, stopChan chan struct{}) { +func (mp *managementPort) CheckManagementPortHealth(routeManager *routeManager, cfg *managementPortConfig, stopChan chan struct{}) { go wait.Until( func() { - checkManagementPortHealth(cfg) + checkManagementPortHealth(routeManager, cfg) }, 30*time.Second, stopChan) diff --git a/go-controller/pkg/node/management-port_dpu_test.go b/go-controller/pkg/node/management-port_dpu_test.go index 3c89748daa..f933327fd9 100644 --- a/go-controller/pkg/node/management-port_dpu_test.go +++ b/go-controller/pkg/node/management-port_dpu_test.go @@ -61,7 +61,7 @@ var _ = Describe("Mananagement port DPU tests", func() { netlinkOpsMock.On("LinkByName", "non-existent-netdev").Return(nil, fmt.Errorf("netlink mock error")) netlinkOpsMock.On("IsLinkNotFoundError", mock.Anything).Return(false) - _, err := mgmtPortDpu.Create(nodeAnnotatorMock, waiter) + _, err := mgmtPortDpu.Create(nil, nodeAnnotatorMock, waiter) Expect(err).To(HaveOccurred()) }) @@ -75,7 +75,7 @@ var _ = Describe("Mananagement port DPU tests", func() { nil, fmt.Errorf("failed to get interface")) netlinkOpsMock.On("IsLinkNotFoundError", mock.Anything).Return(true) - _, err := mgmtPortDpu.Create(nodeAnnotatorMock, waiter) + _, err := mgmtPortDpu.Create(nil, nodeAnnotatorMock, waiter) Expect(err).To(HaveOccurred()) }) @@ -95,7 +95,7 @@ var _ = Describe("Mananagement port DPU tests", func() { netlinkOpsMock.On("LinkSetName", linkMock, types.K8sMgmtIntfName).Return(fmt.Errorf("failed to set name")) mockOVSListInterfaceMgmtPortNotExistCmd(execMock, types.K8sMgmtIntfName) - _, err := mgmtPortDpu.Create(nodeAnnotatorMock, waiter) + _, err := mgmtPortDpu.Create(nil, nodeAnnotatorMock, waiter) Expect(err).To(HaveOccurred()) }) @@ -127,7 +127,7 @@ var _ = Describe("Mananagement port DPU tests", func() { Cmd: genOVSAddMgmtPortCmd(mgmtPortDpu.nodeName, mgmtPortDpu.repName), }) - mpcfg, err := mgmtPortDpu.Create(nodeAnnotatorMock, waiter) + mpcfg, err := mgmtPortDpu.Create(nil, nodeAnnotatorMock, waiter) Expect(execMock.CalledMatchesExpected()).To(BeTrue(), execMock.ErrorDesc) Expect(err).ToNot(HaveOccurred()) Expect(mpcfg.ifName).To(Equal(types.K8sMgmtIntfName)) @@ -158,7 +158,7 @@ var _ = Describe("Mananagement port DPU tests", func() { Cmd: genOVSAddMgmtPortCmd(mgmtPortDpu.nodeName, mgmtPortDpu.repName), }) - mpcfg, err := mgmtPortDpu.Create(nodeAnnotatorMock, waiter) + mpcfg, err := mgmtPortDpu.Create(nil, nodeAnnotatorMock, waiter) Expect(execMock.CalledMatchesExpected()).To(BeTrue(), execMock.ErrorDesc) Expect(err).ToNot(HaveOccurred()) Expect(mpcfg.ifName).To(Equal(types.K8sMgmtIntfName)) @@ -174,7 +174,7 @@ var _ = Describe("Mananagement port DPU tests", func() { netlinkOpsMock.On("LinkByName", "non-existent-netdev").Return(nil, fmt.Errorf("netlink mock error")) netlinkOpsMock.On("IsLinkNotFoundError", mock.Anything).Return(false) - _, err := mgmtPortDpuHost.Create(nil, waiter) + _, err := mgmtPortDpuHost.Create(nil, nil, waiter) Expect(err).To(HaveOccurred()) }) @@ -188,7 +188,7 @@ var _ = Describe("Mananagement port DPU tests", func() { nil, fmt.Errorf("failed to get interface")) netlinkOpsMock.On("IsLinkNotFoundError", mock.Anything).Return(true) - _, err := mgmtPortDpuHost.Create(nil, waiter) + _, err := mgmtPortDpuHost.Create(nil, nil, waiter) Expect(err).To(HaveOccurred()) }) @@ -225,7 +225,7 @@ var _ = Describe("Mananagement port DPU tests", func() { netlinkOpsMock.On("LinkByName", mock.Anything).Return(nil, fmt.Errorf( "createPlatformManagementPort error")) - _, err = mgmtPortDpuHost.Create(nil, nil) + _, err = mgmtPortDpuHost.Create(nil, nil, nil) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("createPlatformManagementPort error")) }) @@ -261,7 +261,7 @@ var _ = Describe("Mananagement port DPU tests", func() { netlinkOpsMock.On("LinkByName", mock.Anything).Return(nil, fmt.Errorf( "createPlatformManagementPort error")).Once() - _, err = mgmtPortDpuHost.Create(nil, nil) + _, err = mgmtPortDpuHost.Create(nil, nil, nil) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring( "createPlatformManagementPort error")) diff --git a/go-controller/pkg/node/management-port_linux.go b/go-controller/pkg/node/management-port_linux.go index 44e631d00c..561c5461d3 100644 --- a/go-controller/pkg/node/management-port_linux.go +++ b/go-controller/pkg/node/management-port_linux.go @@ -174,7 +174,7 @@ func tearDownManagementPortConfig(mpcfg *managementPortConfig) error { return tearDownInterfaceIPConfig(mpcfg.link, ipt4, ipt6) } -func setupManagementPortIPFamilyConfig(mpcfg *managementPortConfig, cfg *managementPortIPFamilyConfig) ([]string, error) { +func setupManagementPortIPFamilyConfig(routeManager *routeManager, mpcfg *managementPortConfig, cfg *managementPortIPFamilyConfig) ([]string, error) { var warnings []string var err error var exists bool @@ -190,20 +190,28 @@ func setupManagementPortIPFamilyConfig(mpcfg *managementPortConfig, cfg *managem return warnings, err } + var routes []route for _, subnet := range cfg.allSubnets { - if exists, err = util.LinkRouteExists(mpcfg.link, cfg.gwIP, subnet); err == nil && !exists { - // we need to warn so that it can be debugged as to why routes are disappearing - warnings = append(warnings, fmt.Sprintf("missing route entry for subnet %s via gateway %s on link %v", - subnet, cfg.gwIP, mpcfg.ifName)) - } + exists, err = util.LinkRouteExists(mpcfg.link, cfg.gwIP, subnet) if err != nil { return warnings, err } - - err = util.LinkRoutesApply(mpcfg.link, cfg.gwIP, []*net.IPNet{subnet}, config.Default.RoutableMTU, nil) - if err != nil { - return warnings, err + if exists { + continue } + // we need to warn so that it can be debugged as to why routes are disappearing + warnings = append(warnings, fmt.Sprintf("missing route entry for subnet %s via gateway %s on link %v", + subnet, cfg.gwIP, mpcfg.ifName)) + subnetCopy := *subnet + routes = append(routes, route{ + gwIP: cfg.gwIP, + subnet: &subnetCopy, + mtu: config.Default.RoutableMTU, + srcIP: nil, + }) + } + if len(routes) > 0 { + routeManager.add(routesPerLink{mpcfg.link, routes}) } // Add a neighbour entry on the K8s node to map routerIP with routerMAC. This is @@ -275,16 +283,16 @@ func setupManagementPortIPFamilyConfig(mpcfg *managementPortConfig, cfg *managem return warnings, nil } -func setupManagementPortConfig(cfg *managementPortConfig) ([]string, error) { +func setupManagementPortConfig(routeManager *routeManager, cfg *managementPortConfig) ([]string, error) { var warnings, allWarnings []string var err error if cfg.ipv4 != nil { - warnings, err = setupManagementPortIPFamilyConfig(cfg, cfg.ipv4) + warnings, err = setupManagementPortIPFamilyConfig(routeManager, cfg, cfg.ipv4) allWarnings = append(allWarnings, warnings...) } if cfg.ipv6 != nil && err == nil { - warnings, err = setupManagementPortIPFamilyConfig(cfg, cfg.ipv6) + warnings, err = setupManagementPortIPFamilyConfig(routeManager, cfg, cfg.ipv6) allWarnings = append(allWarnings, warnings...) } @@ -294,7 +302,7 @@ func setupManagementPortConfig(cfg *managementPortConfig) ([]string, error) { // createPlatformManagementPort creates a management port attached to the node switch // that lets the node access its pods via their private IP address. This is used // for health checking and other management tasks. -func createPlatformManagementPort(interfaceName string, localSubnets []*net.IPNet) (*managementPortConfig, error) { +func createPlatformManagementPort(routeManager *routeManager, interfaceName string, localSubnets []*net.IPNet) (*managementPortConfig, error) { var cfg *managementPortConfig var err error @@ -306,7 +314,7 @@ func createPlatformManagementPort(interfaceName string, localSubnets []*net.IPNe return nil, err } - if _, err = setupManagementPortConfig(cfg); err != nil { + if _, err = setupManagementPortConfig(routeManager, cfg); err != nil { return nil, err } @@ -476,8 +484,8 @@ func DelMgtPortIptRules() { // 1. route entries to cluster CIDR and service CIDR through management port // 2. ARP entry for the node subnet's gateway ip // 3. IPtables chain and rule for SNATing packets entering the logical topology -func checkManagementPortHealth(cfg *managementPortConfig) { - warnings, err := setupManagementPortConfig(cfg) +func checkManagementPortHealth(routeManager *routeManager, cfg *managementPortConfig) { + warnings, err := setupManagementPortConfig(routeManager, cfg) for _, warning := range warnings { klog.Warningf(warning) } diff --git a/go-controller/pkg/node/management-port_linux_test.go b/go-controller/pkg/node/management-port_linux_test.go index ddc0b01de7..c13a95a512 100644 --- a/go-controller/pkg/node/management-port_linux_test.go +++ b/go-controller/pkg/node/management-port_linux_test.go @@ -12,6 +12,8 @@ import ( "os" "path/filepath" "strings" + "sync" + "time" "github.com/containernetworking/plugins/pkg/ns" "github.com/containernetworking/plugins/pkg/testutils" @@ -21,6 +23,7 @@ import ( "github.com/vishvananda/netlink" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressipv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -32,8 +35,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" - egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" - . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) @@ -149,20 +150,26 @@ func checkMgmtTestPortIpsAndRoutes(configs []managementPortTestConfig, mgmtPortN gatewayIP := ovntest.MustParseIP(cfg.expectedGatewayIP) subnets := []string{cfg.clusterCIDR} for _, subnet := range subnets { - foundRoute := false dstIPnet := ovntest.MustParseIPNet(subnet) route := &netlink.Route{Dst: dstIPnet} filterMask := netlink.RT_FILTER_DST - routes, err := netlink.RouteListFiltered(cfg.family, route, filterMask) - Expect(err).NotTo(HaveOccurred()) - for _, r := range routes { - if r.Gw.Equal(gatewayIP) && r.LinkIndex == mgmtPortLink.Attrs().Index { - foundRoute = true - break + Eventually(func() error { + foundRoute := false + routes, err := netlink.RouteListFiltered(cfg.family, route, filterMask) + if err != nil { + return err } - } - Expect(foundRoute).To(BeTrue(), "did not find expected route to %s", subnet) - foundRoute = false + for _, r := range routes { + if r.Gw.Equal(gatewayIP) && r.LinkIndex == mgmtPortLink.Attrs().Index { + foundRoute = true + break + } + } + if !foundRoute { + return fmt.Errorf("did not find exected route to %s", subnet) + } + return nil + }, 1*time.Second).ShouldNot(HaveOccurred()) j++ } Expect(j).To(Equal(1)) @@ -258,12 +265,24 @@ func testManagementPort(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns.Net nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() - - err = testNS.Do(func(ns.NetNS) error { + mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs) + wg := &sync.WaitGroup{} + rm := newRouteManager(wg, true, 10*time.Second) + stopCh := make(chan struct{}) + defer func() { + close(stopCh) + wg.Wait() + }() + + go testNS.Do(func(netNS ns.NetNS) error { defer GinkgoRecover() + wg.Add(1) + rm.run(stopCh) + return nil + }) - mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs) - _, err = mgmtPorts[0].Create(nodeAnnotator, waiter) + err = testNS.Do(func(ns.NetNS) error { + _, err = mgmtPorts[0].Create(rm, nodeAnnotator, waiter) Expect(err).NotTo(HaveOccurred()) checkMgmtTestPortIpsAndRoutes(configs, mgtPort, mgtPortAddrs, expectedLRPMAC) return nil @@ -335,12 +354,20 @@ func testManagementPortDPU(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns. nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() + wg := &sync.WaitGroup{} + rm := newRouteManager(wg, true, 10*time.Second) + stopCh := make(chan struct{}) + go rm.run(stopCh) + wg.Add(1) + defer func() { + close(stopCh) + wg.Wait() + }() err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() - mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs) - _, err = mgmtPorts[0].Create(nodeAnnotator, waiter) + _, err = mgmtPorts[0].Create(rm, nodeAnnotator, waiter) Expect(err).NotTo(HaveOccurred()) // make sure interface was renamed and mtu was set l, err := netlink.LinkByName(mgtPort) @@ -405,12 +432,25 @@ func testManagementPortDPUHost(ctx *cli.Context, fexec *ovntest.FakeExec, testNS _, err = config.InitConfig(ctx, fexec, nil) Expect(err).NotTo(HaveOccurred()) - + wg := &sync.WaitGroup{} + rm := newRouteManager(wg, true, 10*time.Second) + stopCh := make(chan struct{}) + go testNS.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + wg.Add(1) + rm.run(stopCh) + return nil + }) + defer func() { + close(stopCh) + wg.Wait() + }() err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs) - _, err = mgmtPorts[0].Create(nil, nil) + + _, err = mgmtPorts[0].Create(rm, nil, nil) Expect(err).NotTo(HaveOccurred()) checkMgmtTestPortIpsAndRoutes(configs, mgtPort, mgtPortAddrs, expectedLRPMAC) // check mgmt port MAC, mtu and link state diff --git a/go-controller/pkg/node/route_manager.go b/go-controller/pkg/node/route_manager.go new file mode 100644 index 0000000000..346c273a8b --- /dev/null +++ b/go-controller/pkg/node/route_manager.go @@ -0,0 +1,503 @@ +package node + +import ( + "fmt" + "net" + "sync" + "time" + + "github.com/vishvananda/netlink" + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" +) + +type routeManager struct { + // log all route netlink events received + logRouteChanges bool + // period for which we want to check that the routes we manage are applied. This is needed for the rare case + // we miss a route event. + syncPeriod time.Duration + store map[string]routesPerLink // key is link name + addRouteCh chan routesPerLink + delRouteCh chan routesPerLink + wg *sync.WaitGroup +} + +// newRouteManager manages routes which include adding and deletion of routes. It also manages restoration of managed routes. +// Begin managing routes by calling run() to start the manager. +// Routes should be added via add(route) and deletion via del(route) functions only. +// All other functions are used internally. +func newRouteManager(wg *sync.WaitGroup, logRouteChanges bool, syncPeriod time.Duration) *routeManager { + return &routeManager{ + logRouteChanges: logRouteChanges, + syncPeriod: syncPeriod, + store: make(map[string]routesPerLink), + addRouteCh: make(chan routesPerLink, 5), + delRouteCh: make(chan routesPerLink, 5), + wg: wg, + } +} + +func (rm *routeManager) run(stopCh <-chan struct{}) { + var err error + var subscribed bool + var routeEventCh chan netlink.RouteUpdate + subscribed, routeEventCh = subscribeNetlinkRouteEvents(stopCh) + ticker := time.NewTicker(rm.syncPeriod) + defer ticker.Stop() + defer rm.wg.Done() + + for { + select { + case <-stopCh: + // continue existing behaviour of not cleaning up routes upon exit + return + case newRouteEvent, ok := <-routeEventCh: + if !ok { + klog.Info("Route Manager: failed to read netlink route event - resubscribing") + subscribed, routeEventCh = subscribeNetlinkRouteEvents(stopCh) + continue + } + if err = rm.processNetlinkEvent(newRouteEvent); err != nil { + klog.Errorf("Route Manager: failed to process route update event (%s): %v", newRouteEvent.String(), err) + } + case <-ticker.C: + if !subscribed { + klog.Info("Route Manager: netlink route events aren't subscribed - resubscribing") + subscribed, routeEventCh = subscribeNetlinkRouteEvents(stopCh) + } + rm.sync() + case newRoute := <-rm.addRouteCh: + if err = rm.addRoutesPerLink(newRoute); err != nil { + klog.Errorf("Route Manager: failed to add route (%s): %v", newRoute.String(), err) + } + case delRoute := <-rm.delRouteCh: + if err = rm.delRoutesPerLink(delRoute); err != nil { + klog.Errorf("Route Manager: failed to delete route (%s): %v", delRoute.String(), err) + } + } + } +} + +func (rm *routeManager) add(rl routesPerLink) { + rm.addRouteCh <- rl +} + +func (rm *routeManager) del(rl routesPerLink) { + rm.delRouteCh <- rl +} + +func (rm *routeManager) addRoutesPerLink(rl routesPerLink) error { + klog.Infof("Route Manager: attempting to add routes for link: %s", rl.String()) + if err := rl.validate(); err != nil { + return fmt.Errorf("failed to validate addition of new routes for link (%s): %v", rl.String(), err) + } + if err := rm.addRoutesPerLinkStore(rl); err != nil { + return fmt.Errorf("failed to add route for link to store: %w", err) + } + if err := rm.applyRoutesPerLink(rl); err != nil { + return fmt.Errorf("failed to apply route for link: %v", err) + } + klog.Infof("Route Manager: completed adding route: %s", rl.String()) + return nil +} + +func (rm *routeManager) delRoutesPerLink(rl routesPerLink) error { + klog.Infof("Route Manager: attempting to delete routes for link: %s", rl.String()) + if err := rl.validate(); err != nil { + return fmt.Errorf("failed to validate route for link (%s): %v", rl.String(), err) + } + var deletedRoutes []route + for _, r := range rl.routes { + if err := rm.netlinkDelRoute(rl.link, r.subnet); err != nil { + return err + } + deletedRoutes = append(deletedRoutes, r) + } + infName, err := rl.getLinkName() + if err != nil { + return fmt.Errorf("failed to delete route (%+v) because we failed to get link name: %v", + rl, err) + } + routesPerLinkFound, ok := rm.store[infName] + if !ok { + routesPerLinkFound = routesPerLink{rl.link, []route{}} + } + if len(deletedRoutes) > 0 { + routesPerLinkFound.delRoutes(deletedRoutes) + } + if len(routesPerLinkFound.routes) == 0 { + delete(rm.store, infName) + } else { + rm.store[infName] = routesPerLinkFound + } + klog.Infof("Route Manager: deletion of routes for link complete: %s", rl.String()) + return nil +} + +// processNetlinkEvent will log new and deleted routes if logRouteChanges is true. It will also check if a deleted route +// is managed by route manager and if so, determine if a sync is needed to restore any managed routes. +func (rm *routeManager) processNetlinkEvent(ru netlink.RouteUpdate) error { + if ru.Type == unix.RTM_NEWROUTE { + // An event resulting from `ip route change` will be seen as type RTM_NEWROUTE event and therefore this function will only + // log the changes and not attempt to restore the change. This will be accomplished by the sync function. + if rm.logRouteChanges { + klog.Infof("Route Manager: netlink route addition event: %q", ru.String()) + } + return nil + } + if ru.Type != unix.RTM_DELROUTE { + return nil + } + if rm.logRouteChanges { + klog.Infof("Route Manager: netlink route deletion event: %q", ru.String()) + } + rlEvent, err := convertRouteUpdateToRoutesPerLink(ru) + if err != nil { + return fmt.Errorf("failed to convert netlink event to routesPerLink: %v", err) + } + infName, err := rlEvent.getLinkName() + if err != nil { + return fmt.Errorf("failed to get link name: %v", err) + } + rl, ok := rm.store[infName] + if !ok { + // we don't manage this interface + return nil + } + + var syncNeeded bool + var syncReason string + for _, managedRoute := range rl.routes { + for _, routeEvent := range rlEvent.routes { + if managedRoute.equal(routeEvent) { + syncNeeded = true + syncReason = fmt.Sprintf("managed route was modified: %s", managedRoute.string()) + } + } + } + if syncNeeded { + klog.Infof("Route Manager: sync required for routes associated with link %q. Reason: %s", infName, syncReason) + if err = rm.applyRoutesPerLink(rl); err != nil { + klog.Errorf("Route Manager: failed to apply route for link (%s): %w", rl.String(), err) + } + } + return nil +} + +func (rm *routeManager) applyRoutesPerLink(rl routesPerLink) error { + for _, r := range rl.routes { + if err := rm.applyRoute(rl.link, r.gwIP, r.subnet, r.mtu, r.srcIP); err != nil { + return fmt.Errorf("failed to apply route (%s) because of error: %v", r.string(), err) + } + } + return nil +} + +func (rm *routeManager) applyRoute(link netlink.Link, gwIP net.IP, subnet *net.IPNet, mtu int, src net.IP) error { + filterRoute, filterMask := filterRouteByDst(link, subnet) + nlRoutes, err := netlink.RouteListFiltered(getNetlinkIPFamily(gwIP), filterRoute, filterMask) + if err != nil { + return fmt.Errorf("failed to list filtered routes: %v", err) + } + if len(nlRoutes) == 0 { + return rm.netlinkAddRoute(link, gwIP, subnet, mtu, src) + } + netlinkRoute := &nlRoutes[0] + if netlinkRoute.MTU != mtu || !src.Equal(netlinkRoute.Src) || !gwIP.Equal(netlinkRoute.Gw) { + netlinkRoute.MTU = mtu + netlinkRoute.Src = src + netlinkRoute.Gw = gwIP + err = netlink.RouteReplace(netlinkRoute) + if err != nil { + return fmt.Errorf("failed to replace route for subnet %s via gateway %s with mtu %d: %v", + subnet.String(), gwIP.String(), mtu, err) + } + } + return nil +} + +func (rm *routeManager) netlinkAddRoute(link netlink.Link, gwIP net.IP, subnet *net.IPNet, mtu int, srcIP net.IP) error { + newNlRoute := &netlink.Route{ + Dst: subnet, + LinkIndex: link.Attrs().Index, + Scope: netlink.SCOPE_UNIVERSE, + Gw: gwIP, + } + if len(srcIP) > 0 { + newNlRoute.Src = srcIP + } + if mtu != 0 { + newNlRoute.MTU = mtu + } + err := netlink.RouteAdd(newNlRoute) + if err != nil { + return fmt.Errorf("failed to add route (%s): %v", newNlRoute.String(), err) + } + return nil +} + +func (rm *routeManager) netlinkDelRoute(link netlink.Link, subnet *net.IPNet) error { + // List routes for the link in the default routing table + nlRoutes, err := netlink.RouteList(link, netlink.FAMILY_ALL) + if err != nil { + return fmt.Errorf("failed to get routes for link %s: %v", link.Attrs().Name, err) + } + for _, nlRoute := range nlRoutes { + deleteRoute := false + // Delete if subnet is nil and netlink route dst is nil or if they are equal + if subnet == nil { + deleteRoute = nlRoute.Dst == nil + } else if nlRoute.Dst != nil { + deleteRoute = nlRoute.Dst.String() == subnet.String() + } + + if deleteRoute { + err = netlink.RouteDel(&nlRoute) + if err != nil { + net := "default" + if nlRoute.Dst != nil { + net = nlRoute.Dst.String() + } + return fmt.Errorf("failed to delete route '%s via %s' for link %s : %v\n", + net, nlRoute.Gw.String(), link.Attrs().Name, err) + } + break + } + } + return nil +} + +func (rm *routeManager) addRoutesPerLinkStore(rl routesPerLink) error { + infName, err := rl.getLinkName() + if err != nil { + return fmt.Errorf("failed to add route for link (%s) to store because we could not get link name", rl.String()) + } + managedRl, ok := rm.store[infName] + if !ok { + rm.store[infName] = rl + return nil + } + newRoutes := make([]route, 0) + for _, newRoute := range rl.routes { + var found bool + for _, managedRoute := range managedRl.routes { + if managedRoute.equal(newRoute) { + found = true + break + } + } + if !found { + newRoutes = append(newRoutes, newRoute) + } + } + if len(newRoutes) == 0 { + klog.Infof("Route Manager: nothing to process for new route for link as it is already managed: %s", rl.String()) + return nil + } + managedRl.routes = append(managedRl.routes, newRoutes...) + rm.store[infName] = managedRl + return nil +} + +// sync will iterate through all links routes seen on a node and ensure any route manager managed routes are applied. Any additional +// routes for this link are preserved. sync only inspects routes for links which we managed and ignore routes for non-managed links. +func (rm *routeManager) sync() { + for infName, rl := range rm.store { + activeNlRoutes, err := netlink.RouteList(rl.link, nl.FAMILY_ALL) + if err != nil { + klog.Errorf("Route Manager: failed to list routes for link %q: %w", infName, err) + continue + } + var activeRoutes []route + for _, activeNlRoute := range activeNlRoutes { + activeRoute, err := convertNetlinkRouteToRoutesPerLink(activeNlRoute) + if err != nil { + klog.Errorf("Route Manager: failed to convert netlink route (%s) to route: %v", + activeRoute.String(), err) + continue + } + activeRoutes = append(activeRoutes, activeRoute.routes...) + } + var syncNeeded bool + var syncReason string + for _, expectedRoute := range rl.routes { + var found bool + for _, activeRoute := range activeRoutes { + if activeRoute.equal(expectedRoute) { + found = true + } + } + if !found { + syncReason = fmt.Sprintf("failed to find route: %s", expectedRoute.string()) + syncNeeded = true + break + } + } + if syncNeeded { + klog.Infof("Route Manager: sync required for routes associated with link %q. Reason: %s", infName, syncReason) + if err = rm.applyRoutesPerLink(rl); err != nil { + klog.Errorf("Route Manager: sync failed to apply route (%s): %v", rl.String(), err) + } + } + } +} + +type routesPerLink struct { + link netlink.Link + routes []route +} + +func (rl routesPerLink) validate() error { + if rl.link == nil || rl.link.Attrs() == nil || rl.link.Attrs().Name == "" { + return fmt.Errorf("link must be valid") + } + if len(rl.routes) == 0 { + return fmt.Errorf("route must have a least one route entry") + } + for _, r := range rl.routes { + if r.subnet == nil || r.subnet.String() == "" { + return fmt.Errorf("invalid subnet for route entry") + } + } + return nil +} + +func (rl routesPerLink) getLinkName() (string, error) { + if rl.link == nil || rl.link.Attrs() == nil || rl.link.Attrs().Name == "" { + return "", fmt.Errorf("unable to get link name from: '%+v'", rl.link) + } + return rl.link.Attrs().Name, nil +} + +func (rl routesPerLink) String() string { + var routes string + for i, r := range rl.routes { + routes = fmt.Sprintf("%s Route %d: %q", routes, i+1, r.string()) + } + return fmt.Sprintf("Route(s) for link name: %q, with %d routes: %s", rl.link.Attrs().Name, len(rl.routes), routes) +} + +func (rl *routesPerLink) delRoutes(delRoutes []route) { + if len(delRoutes) == 0 { + return + } + routes := make([]route, 0) + for _, existingRoute := range rl.routes { + var found bool + for _, delRoute := range delRoutes { + if existingRoute.equal(delRoute) { + found = true + } + } + if !found { + routes = append(routes, existingRoute) + } + } + rl.routes = routes +} + +type route struct { + gwIP net.IP + subnet *net.IPNet + mtu int + srcIP net.IP +} + +func (r route) equal(r2 route) bool { + if r.mtu != r2.mtu { + return false + } + if r.subnet.String() != r2.subnet.String() { + return false + } + if r.gwIP.String() != r2.gwIP.String() { + return false + } + if r.srcIP.String() != r2.srcIP.String() { + return false + } + return true +} + +func (r route) string() string { + var s string + if r.subnet != nil { + s = fmt.Sprintf("Subnet: %s", r.subnet.String()) + } + if r.mtu != 0 { + s = fmt.Sprintf("%s MTU: %d ", s, r.mtu) + } + if len(r.srcIP) > 0 { + s = fmt.Sprintf("%s Source IP: %q ", s, r.srcIP.String()) + } + if len(r.gwIP) > 0 { + s = fmt.Sprintf("%s Gateway IP: %q", s, r.gwIP.String()) + } + return s +} + +func convertRouteUpdateToRoutesPerLink(ru netlink.RouteUpdate) (routesPerLink, error) { + link, err := netlink.LinkByIndex(ru.LinkIndex) + if err != nil { + return routesPerLink{}, fmt.Errorf("failed to get link by index from route update: %v", ru) + } + + return routesPerLink{ + link: link, + routes: []route{ + { + gwIP: ru.Gw, + subnet: ru.Dst, + mtu: ru.MTU, + srcIP: ru.Src, + }, + }, + }, nil +} + +func convertNetlinkRouteToRoutesPerLink(nlRoute netlink.Route) (routesPerLink, error) { + link, err := netlink.LinkByIndex(nlRoute.LinkIndex) + if err != nil { + return routesPerLink{}, fmt.Errorf("failed to get link by index (%d) from route (%s): %w", nlRoute.LinkIndex, + nlRoute.String(), err) + } + return routesPerLink{ + link: link, + routes: []route{ + { + gwIP: nlRoute.Gw, + subnet: nlRoute.Dst, + mtu: nlRoute.MTU, + srcIP: nlRoute.Src, + }, + }, + }, nil +} + +func getNetlinkIPFamily(ip net.IP) int { + if utilnet.IsIPv6(ip) { + return netlink.FAMILY_V6 + } else { + return netlink.FAMILY_V4 + } +} + +func filterRouteByDst(link netlink.Link, subnet *net.IPNet) (*netlink.Route, uint64) { + return &netlink.Route{ + Dst: subnet, + LinkIndex: link.Attrs().Index, + }, + netlink.RT_FILTER_DST | netlink.RT_FILTER_OIF +} + +func subscribeNetlinkRouteEvents(stopCh <-chan struct{}) (bool, chan netlink.RouteUpdate) { + routeEventCh := make(chan netlink.RouteUpdate, 20) + if err := netlink.RouteSubscribe(routeEventCh, stopCh); err != nil { + klog.Errorf("Route Manager: failed to subscribe to netlink route events: %v", err) + return false, routeEventCh + } + return true, routeEventCh +} diff --git a/go-controller/pkg/node/route_manager_test.go b/go-controller/pkg/node/route_manager_test.go new file mode 100644 index 0000000000..7c40f9fb5d --- /dev/null +++ b/go-controller/pkg/node/route_manager_test.go @@ -0,0 +1,284 @@ +package node + +import ( + "net" + "os" + "runtime" + "sync" + "time" + + "github.com/containernetworking/plugins/pkg/ns" + "github.com/containernetworking/plugins/pkg/testutils" + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + "github.com/vishvananda/netlink" +) + +var _ = ginkgo.Describe("Route Manager", func() { + var rm *routeManager + var stopCh chan struct{} + var wg *sync.WaitGroup + var testNS ns.NetNS + var loLink netlink.Link + loMTU := 65520 + loAlternativeMTU := 9000 + loLinkName := "lo" + loSubnet := &net.IPNet{ + IP: net.IPv4(127, 1, 0, 0), + Mask: net.CIDRMask(24, 32), + } + altSubnet := &net.IPNet{ + IP: net.IPv4(10, 10, 0, 0), + Mask: net.CIDRMask(24, 32), + } + loIP := net.IPv4(127, 1, 1, 1) + loIPDiff := net.IPv4(127, 1, 1, 2) + loGWIP := net.IPv4(127, 1, 1, 254) + if os.Getuid() != 0 { + ginkgo.Skip("Test requires root privileges") + } + + ginkgo.BeforeEach(func() { + var err error + runtime.LockOSThread() + testNS, err = testutils.NewNS() + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + + wg = &sync.WaitGroup{} + stopCh = make(chan struct{}) + wg.Add(1) + syncPeriod := 10 * time.Millisecond + logAllActivity := true + rm = newRouteManager(wg, logAllActivity, syncPeriod) + err = testNS.Do(func(netNS ns.NetNS) error { + defer ginkgo.GinkgoRecover() + loLink, err = netlink.LinkByName(loLinkName) + if err != nil { + return err + } + if err := netlink.LinkSetUp(loLink); err != nil { + return err + } + + loAddr := &netlink.Addr{ + IPNet: loSubnet, + } + if err := netlink.AddrAdd(loLink, loAddr); err != nil { + return err + } + route := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Src: loIP} + if err := netlink.RouteAdd(&route); err != nil { + return err + } + return nil + }) + + go testNS.Do(func(netNS ns.NetNS) error { + defer ginkgo.GinkgoRecover() + rm.run(stopCh) + return nil + }) + }) + + ginkgo.AfterEach(func() { + defer runtime.UnlockOSThread() + close(stopCh) + wg.Wait() + gomega.Expect(testNS.Close()).ShouldNot(gomega.HaveOccurred()) + gomega.Expect(testutils.UnmountNS(testNS)).To(gomega.Succeed()) + }) + + ginkgo.Context("del route", func() { + ginkgo.It("del route", func() { + r := route{nil, altSubnet, 0, nil} + rl := routesPerLink{loLink, []route{r}} + rm.add(rl) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + rm.del(rl) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeFalse()) + }) + + }) + + ginkgo.Context("add route", func() { + ginkgo.It("applies route with subnet, gateway IP, src IP, MTU", func() { + r := route{loGWIP, loSubnet, loMTU, loIP} + rl := routesPerLink{loLink, []route{r}} + rm.add(rl) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.It("applies route with subnets, gateway IP, src IP", func() { + r := route{loGWIP, loSubnet, 0, loIP} + rl := routesPerLink{loLink, []route{r}} + rm.add(rl) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.It("applies route with subnets, gateway IP", func() { + r := route{loGWIP, loSubnet, 0, nil} + rl := routesPerLink{loLink, []route{r}} + rm.add(rl) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.It("applies route with subnets", func() { + r := route{nil, loSubnet, 0, nil} + rl := routesPerLink{loLink, []route{r}} + rm.add(rl) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.It("route exists, has different mtu and is updated", func() { + // route already exists for default mtu - no need to add it + r := route{nil, loSubnet, loAlternativeMTU, nil} + rl := routesPerLink{loLink, []route{r}} + rm.add(rl) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.It("route exists, has different src and is updated", func() { + // route already exists for src ip - no need to add it + r := route{nil, loSubnet, 0, loIPDiff} + rl := routesPerLink{loLink, []route{r}} + rm.add(rl) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + }) + + ginkgo.Context("runtime sync", func() { + ginkgo.It("reapplies managed route that was removed (gw IP, mtu, src IP)", func() { + r := route{loGWIP, loSubnet, loMTU, loIP} + rm.add(routesPerLink{loLink, []route{r}}) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + // clear routes and wait for sync to reapply + routeList, err := getRouteList(testNS, loLink, netlink.FAMILY_ALL) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + gomega.Expect(len(routeList)).Should(gomega.BeNumerically(">", 0)) + gomega.Expect(deleteRoutes(testNS, routeList...)).ShouldNot(gomega.HaveOccurred()) + // wait for sync to activate since managed routes have been deleted + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.It("reapplies managed route that was removed (mtu, src IP)", func() { + r := route{nil, loSubnet, loMTU, loIP} + rm.add(routesPerLink{loLink, []route{r}}) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + // clear routes and wait for sync to reapply + routeList, err := getRouteList(testNS, loLink, netlink.FAMILY_ALL) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + gomega.Expect(len(routeList)).Should(gomega.BeNumerically(">", 0)) + gomega.Expect(deleteRoutes(testNS, routeList...)).ShouldNot(gomega.HaveOccurred()) + // wait for sync to activate since managed routes have been deleted + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.It("reapplies managed route that was removed because link is down", func() { + r := route{nil, loSubnet, loMTU, loIP} + rm.add(routesPerLink{loLink, []route{r}}) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + gomega.Expect(setLinkDown(testNS, loLink)).ShouldNot(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeFalse()) + gomega.Expect(setLinkUp(testNS, loLink)).ShouldNot(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + return doesRouteEntryExist(testNS, loLink, r) + }, time.Second).Should(gomega.BeTrue()) + }) + }) +}) + +func doesRouteEntryExist(targetNs ns.NetNS, link netlink.Link, reCandidate route) bool { + nlRoutesFound, err := getRouteList(targetNs, link, netlink.FAMILY_ALL) + if err != nil { + return false + } + + for _, nlRouteFound := range nlRoutesFound { + routeFound, err := convertNetlinkRouteToRoutesPerLink(nlRouteFound) + if err != nil { + return false + } + if len(routeFound.routes) == 0 { + return false + } + r := routeFound.routes[0] // always only one RE + if r.equal(reCandidate) { + return true + } + } + return false +} + +func getRouteList(targetNs ns.NetNS, link netlink.Link, ipFamily int) ([]netlink.Route, error) { + nlRoutesFound := make([]netlink.Route, 0) + var err error + err = targetNs.Do(func(netNS ns.NetNS) error { + nlRoutesFound, err = netlink.RouteList(link, ipFamily) + if err != nil { + return err + } + return nil + }) + return nlRoutesFound, err +} + +func deleteRoutes(targetNs ns.NetNS, nlRoutes ...netlink.Route) error { + var err error + err = targetNs.Do(func(netNS ns.NetNS) error { + for _, nlRoute := range nlRoutes { + if err = netlink.RouteDel(&nlRoute); err != nil { + return err + } + } + return nil + }) + return err +} + +func setLinkUp(targetNS ns.NetNS, link netlink.Link) error { + return setLink(targetNS, link, netlink.LinkSetUp) +} + +func setLinkDown(targetNS ns.NetNS, link netlink.Link) error { + return setLink(targetNS, link, netlink.LinkSetDown) +} +func setLink(targetNS ns.NetNS, link netlink.Link, nlFunc func(link2 netlink.Link) error) error { + err := targetNS.Do(func(netNS ns.NetNS) error { + if err := nlFunc(link); err != nil { + return err + } + return nil + }) + if err != nil { + return err + } + return nil +} diff --git a/go-controller/pkg/util/net_linux.go b/go-controller/pkg/util/net_linux.go index 44ee3e31e5..207dd34aa7 100644 --- a/go-controller/pkg/util/net_linux.go +++ b/go-controller/pkg/util/net_linux.go @@ -308,35 +308,6 @@ func LinkRoutesAdd(link netlink.Link, gwIP net.IP, subnets []*net.IPNet, mtu int return nil } -// LinkRoutesApply applies routes for given subnets. -// For each subnet it searches for an existing route by destination(subnet) on link: -// * if found and gwIP, mtu or src changed the route will be updated -// * if not found it adds a new route -func LinkRoutesApply(link netlink.Link, gwIP net.IP, subnets []*net.IPNet, mtu int, src net.IP) error { - for _, subnet := range subnets { - route, err := LinkRouteGetFilteredRoute(filterRouteByDst(link, subnet)) - if err != nil { - return err - } - if route != nil { - if route.MTU != mtu || !src.Equal(route.Src) || !gwIP.Equal(route.Gw) { - route.MTU = mtu - route.Src = src - route.Gw = gwIP - - err = netLinkOps.RouteReplace(route) - if err != nil { - return fmt.Errorf("failed to replace route for subnet %s via gateway %s with mtu %d: %v", - subnet.String(), gwIP.String(), mtu, err) - } - } - } else { - return LinkRoutesAdd(link, gwIP, []*net.IPNet{subnet}, mtu, src) - } - } - return nil -} - // LinkRouteGetFilteredRoute gets a route for the given route filter. // returns nil if route is not found func LinkRouteGetFilteredRoute(routeFilter *netlink.Route, filterMask uint64) (*netlink.Route, error) { @@ -571,14 +542,6 @@ func GetIFNameAndMTUForAddress(ifAddress net.IP) (string, int, error) { return "", 0, fmt.Errorf("couldn't not find a link associated with the given OVN Encap IP (%s)", ifAddress) } -func filterRouteByDst(link netlink.Link, subnet *net.IPNet) (*netlink.Route, uint64) { - return &netlink.Route{ - Dst: subnet, - LinkIndex: link.Attrs().Index, - }, - netlink.RT_FILTER_DST | netlink.RT_FILTER_OIF -} - func filterRouteByDstAndGw(link netlink.Link, subnet *net.IPNet, gw net.IP) (*netlink.Route, uint64) { return &netlink.Route{ Dst: subnet, diff --git a/go-controller/pkg/util/net_linux_unit_test.go b/go-controller/pkg/util/net_linux_unit_test.go index e23c4658b2..ef4c8ee842 100644 --- a/go-controller/pkg/util/net_linux_unit_test.go +++ b/go-controller/pkg/util/net_linux_unit_test.go @@ -540,182 +540,6 @@ func TestLinkRoutesAdd(t *testing.T) { } } -func TestLinkRoutesAddOrUpdateMTU(t *testing.T) { - mockNetLinkOps := new(mocks.NetLinkOps) - mockLink := new(netlink_mocks.Link) - // below is defined in net_linux.go - netLinkOps = mockNetLinkOps - - tests := []struct { - desc string - inputLink netlink.Link - inputGwIP net.IP - inputSubnets []*net.IPNet - inputMTU int - inputSrc net.IP - errExp bool - onRetArgsNetLinkLibOpers []ovntest.TestifyMockHelper - onRetArgsLinkIfaceOpers []ovntest.TestifyMockHelper - }{ - { - desc: "Route get fails", - inputLink: mockLink, - inputGwIP: ovntest.MustParseIP("192.168.0.1"), - inputSubnets: ovntest.MustParseIPNets("10.18.20.0/24"), - errExp: true, - onRetArgsNetLinkLibOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{}, fmt.Errorf("mock error")}}, - }, - onRetArgsLinkIfaceOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "testIfaceName", Index: 1}}}, - }, - }, - { - desc: "Route does not exist and is added", - inputLink: mockLink, - inputGwIP: ovntest.MustParseIP("192.168.0.1"), - inputSubnets: ovntest.MustParseIPNets("10.18.20.0/24"), - errExp: false, - onRetArgsNetLinkLibOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{}, nil}}, - {OnCallMethodName: "RouteAdd", OnCallMethodArgType: []string{"*netlink.Route"}, RetArgList: []interface{}{nil}}, - }, - onRetArgsLinkIfaceOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "testIfaceName", Index: 1}}}, - {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "testIfaceName", Index: 1}}}, - }, - }, - { - desc: "Route exists, has the same mtu and is not updated", - inputLink: mockLink, - inputGwIP: ovntest.MustParseIP("192.168.0.1"), - inputSubnets: ovntest.MustParseIPNets("10.18.20.0/24"), - inputMTU: 1400, - errExp: false, - onRetArgsNetLinkLibOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ - { - Gw: ovntest.MustParseIP("192.168.0.1"), - Dst: ovntest.MustParseIPNet("10.18.20.0/24"), - MTU: 1400, - }, - }, nil}}, - }, - onRetArgsLinkIfaceOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "testIfaceName", Index: 1}}}, - }, - }, - { - desc: "Route exists, has different mtu and is updated", - inputLink: mockLink, - inputGwIP: ovntest.MustParseIP("192.168.0.1"), - inputSubnets: ovntest.MustParseIPNets("10.18.20.0/24"), - inputMTU: 1400, - errExp: false, - onRetArgsNetLinkLibOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ - { - Gw: ovntest.MustParseIP("192.168.0.1"), - Dst: ovntest.MustParseIPNet("10.18.20.0/24"), - }, - }, nil}}, - {OnCallMethodName: "RouteReplace", OnCallMethodArgType: []string{"*netlink.Route"}, RetArgList: []interface{}{nil}}, - }, - onRetArgsLinkIfaceOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "testIfaceName", Index: 1}}}, - }, - }, - { - desc: "Route exists, has different source and is updated", - inputLink: mockLink, - inputGwIP: nil, - inputSubnets: ovntest.MustParseIPNets("10.18.20.0/24"), - inputMTU: 1400, - inputSrc: ovntest.MustParseIP("192.168.0.8"), - errExp: false, - onRetArgsNetLinkLibOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ - { - Gw: nil, - Dst: ovntest.MustParseIPNet("10.18.20.0/24"), - MTU: 1400, - Src: ovntest.MustParseIP("192.168.0.10"), - }, - }, nil}}, - {OnCallMethodName: "RouteReplace", OnCallMethodArgType: []string{"*netlink.Route"}, RetArgList: []interface{}{nil}}, - }, - onRetArgsLinkIfaceOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "testIfaceName", Index: 1}}}, - }, - }, - { - desc: "Route exists, has different gw and is updated", - inputLink: mockLink, - inputGwIP: ovntest.MustParseIP("192.168.0.1"), - inputSubnets: ovntest.MustParseIPNets("10.18.20.0/24"), - inputMTU: 1400, - inputSrc: ovntest.MustParseIP("192.168.0.8"), - errExp: false, - onRetArgsNetLinkLibOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ - { - Gw: ovntest.MustParseIP("192.168.0.2"), - Dst: ovntest.MustParseIPNet("10.18.20.0/24"), - MTU: 1400, - Src: ovntest.MustParseIP("192.168.0.8"), - }, - }, nil}}, - {OnCallMethodName: "RouteReplace", OnCallMethodArgType: []string{"*netlink.Route"}, RetArgList: []interface{}{nil}}, - }, - onRetArgsLinkIfaceOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "testIfaceName", Index: 1}}}, - }, - }, - { - desc: "Route exists, has the same (mtu, source and gw) and is not updated", - inputLink: mockLink, - inputGwIP: ovntest.MustParseIP("192.168.0.1"), - inputSubnets: ovntest.MustParseIPNets("10.18.20.0/24"), - inputMTU: 1400, - inputSrc: ovntest.MustParseIP("192.168.0.10"), - errExp: false, - onRetArgsNetLinkLibOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ - { - Gw: ovntest.MustParseIP("192.168.0.1"), - Dst: ovntest.MustParseIPNet("10.18.20.0/24"), - MTU: 1400, - Src: ovntest.MustParseIP("192.168.0.10"), - }, - }, nil}}, - }, - onRetArgsLinkIfaceOpers: []ovntest.TestifyMockHelper{ - {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "testIfaceName", Index: 1}}}, - }, - }, - { - desc: "LinkRoutesApply() returns NO error when subnets input list is empty", - }, - } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - - ovntest.ProcessMockFnList(&mockNetLinkOps.Mock, tc.onRetArgsNetLinkLibOpers) - ovntest.ProcessMockFnList(&mockLink.Mock, tc.onRetArgsLinkIfaceOpers) - - err := LinkRoutesApply(tc.inputLink, tc.inputGwIP, tc.inputSubnets, tc.inputMTU, tc.inputSrc) - t.Log(err) - if tc.errExp { - assert.Error(t, err) - } else { - assert.Nil(t, err) - } - mockNetLinkOps.AssertExpectations(t) - mockLink.AssertExpectations(t) - }) - } -} - func TestLinkRouteExists(t *testing.T) { mockNetLinkOps := new(mocks.NetLinkOps) mockLink := new(netlink_mocks.Link) From c62fa306f5f0685ca0853c100f9f2559cf0840e6 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Mon, 24 Apr 2023 11:35:28 +0200 Subject: [PATCH 24/90] Fix stale SNAT entries for completed pods In case of completed pod case, egress ip module is handling pod add, egressip events as usual and when logical port is found, it proceeds with programming SNAT and LRP for pod ip address which is not required. Also when pod is done or completed, then it's not required to recreate SNAT entry referring to node ip address. Hence this commit addresses these two issues so that no stale or duplicate SNAT entries present for a pod. Signed-off-by: Periyasamy Palanisamy --- go-controller/pkg/ovn/egressip.go | 9 +- go-controller/pkg/ovn/egressip_test.go | 390 +++++++++++++++++++++++++ go-controller/pkg/ovn/namespace.go | 2 +- go-controller/pkg/util/kube.go | 6 + 4 files changed, 404 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 5afc3fd775..99c7232c3a 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1089,6 +1089,12 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignmentsWithLock(name strin // work on ovnkube-master restarts when all egress IP handlers will most likely // match and perform the setup for the same pod and status multiple times over. func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statusAssignments []egressipv1.EgressIPStatusItem, pod *kapi.Pod) error { + podKey := getPodKey(pod) + // If pod is already in succeeded or failed state, return it without proceeding further. + if util.PodCompleted(pod) { + klog.Infof("Pod %s is already in completed state, skipping egress ip assignment", podKey) + return nil + } // If statusAssignments is empty just return, not doing this will delete the // external GW set up, even though there might be no egress IP set up to // perform. @@ -1096,7 +1102,6 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu return nil } var remainingAssignments []egressipv1.EgressIPStatusItem - podKey := getPodKey(pod) // Retrieve the pod's networking configuration from the // logicalPortCache. The reason for doing this: a) only normal network // pods are placed in this cache, b) once the pod is placed here we know @@ -2374,7 +2379,7 @@ func (e *egressIPController) addExternalGWPodSNAT(podNamespace, podName string, // external GW setup in those cases. func (e *egressIPController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podNamespace, podName string, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { if config.Gateway.DisableSNATMultipleGWs { - if pod, err := e.watchFactory.GetPod(podNamespace, podName); err == nil && pod.Spec.NodeName == status.Node { + if pod, err := e.watchFactory.GetPod(podNamespace, podName); err == nil && pod.Spec.NodeName == status.Node && util.PodNeedsSNAT(pod) { // if the pod still exists, add snats to->nodeIP (on the node where the pod exists) for these podIPs after deleting the snat to->egressIP // NOTE: This needs to be done only if the pod was on the same node as egressNode extIPs, err := getExternalIPsGR(e.watchFactory, pod.Spec.NodeName) diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 9f52de645a..991400d653 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -5872,6 +5872,396 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + ginkgo.It("ensure egress ip entries are not created when pod is already moved into completed state", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod.Status.Phase = kapi.PodSucceeded + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + node1GR, + node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + node1Switch, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("ensure external gw pod snat entry is not created back when pod is moved into completed state", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + node1GR, + node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + node1Switch, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.StringPtr("k8s-node1"), + Options: map[string]string{ + "stateless": "false", + }, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + UUID: "reroute-UUID1", + } + node1GR.Nat = []string{"egressip-nat-UUID1"} + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, podReRoutePolicy, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + + egressPod.Status.Phase = kapi.PodSucceeded + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), &egressPod, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Wait for pod to get moved into succeeded state. + gomega.Eventually(func() v1.PodPhase { + egressPod1, _ := fakeOvn.watcher.GetPod(egressPod.Namespace, egressPod.Name) + return egressPod1.Status.Phase + }, 5).Should(gomega.Equal(kapi.PodSucceeded)) + + node1GR.Nat = []string{} + expectedDatabaseStatewitCompletedPod := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewitCompletedPod)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should remove stale pod SNAT referring to wrong logical port after ovnkube-master is started", func() { app.Action = func(ctx *cli.Context) error { config.Gateway.DisableSNATMultipleGWs = true diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index 281b9cc20f..4548282ec5 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -216,7 +216,7 @@ func (oc *DefaultNetworkController) updateNamespace(old, newer *kapi.Namespace) errors = append(errors, fmt.Errorf("failed to get all the pods (%v)", err)) } for _, pod := range existingPods { - if !oc.isPodScheduledinLocalZone(pod) { + if !oc.isPodScheduledinLocalZone(pod) && !util.PodNeedsSNAT(pod) { continue } podAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, types.DefaultNetworkName) diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 5a1b5fd0ba..668acbb2cb 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -324,6 +324,12 @@ func GetNodePrimaryIP(node *kapi.Node) (string, error) { kapi.NodeInternalIP, kapi.NodeExternalIP) } +// PodNeedsSNAT returns true if the given pod is eligible to setup snat entry +// in ovn for its egress traffic outside cluster, otherwise returns false. +func PodNeedsSNAT(pod *kapi.Pod) bool { + return PodScheduled(pod) && !PodWantsHostNetwork(pod) && !PodCompleted(pod) +} + // PodWantsHostNetwork returns if the given pod is hostNetworked or not to determine if networking // needs to be setup func PodWantsHostNetwork(pod *kapi.Pod) bool { From 69c7ff662a9b150dae0ed37279a6f863f5a41642 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 4 May 2023 10:42:36 +0200 Subject: [PATCH 25/90] multi-homing, policy, e2e: add requiresExtraNamespace attribute to pod config This makes the test code more explicit than it was before, and addresses the comment + promise for a short refactor done in pr #3531 comment [0]. [0] - https://github.com/ovn-org/ovn-kubernetes/pull/3531#discussion_r1177642261 Signed-off-by: Miguel Duarte Barroso --- test/e2e/multihoming.go | 36 ++++++++++++++++++----------------- test/e2e/multihoming_utils.go | 15 ++++++++------- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 4173ce7e8f..166e80a1d5 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -651,16 +651,18 @@ var _ = Describe("Multi Homing", func() { table.DescribeTable( "multi-network policies configure traffic allow lists", func(netConfig networkAttachmentConfig, allowedClientPodConfig podConfiguration, blockedClientPodConfig podConfiguration, serverPodConfig podConfiguration, policy *mnpapi.MultiNetworkPolicy) { - if blockedClientPodConfig.namespace == "" { - blockedClientPodConfig.namespace = f.Namespace.Name - } else { - blockedClientPodConfig.namespace = extraNamespace.Name + blockedClientPodNamespace := f.Namespace.Name + if blockedClientPodConfig.requiresExtraNamespace { + blockedClientPodNamespace = extraNamespace.Name } - if allowedClientPodConfig.namespace == "" { - allowedClientPodConfig.namespace = f.Namespace.Name - } else { - allowedClientPodConfig.namespace = extraNamespace.Name + blockedClientPodConfig.namespace = blockedClientPodNamespace + + allowedClientPodNamespace := f.Namespace.Name + if allowedClientPodConfig.requiresExtraNamespace { + allowedClientPodNamespace = extraNamespace.Name } + allowedClientPodConfig.namespace = allowedClientPodNamespace + serverPodConfig.namespace = f.Namespace.Name for _, ns := range []v1.Namespace{*f.Namespace, *extraNamespace} { @@ -927,9 +929,9 @@ var _ = Describe("Multi Homing", func() { networkName: uniqueNadName("spans-multiple-namespaces"), }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: allowedClient(clientPodName), - namespace: "pepe", + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: allowedClient(clientPodName), + requiresExtraNamespace: true, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, @@ -961,9 +963,9 @@ var _ = Describe("Multi Homing", func() { networkName: uniqueNadName("spans-multiple-namespaces"), }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: allowedClient(clientPodName), - namespace: "pepe", + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: allowedClient(clientPodName), + requiresExtraNamespace: true, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, @@ -995,9 +997,9 @@ var _ = Describe("Multi Homing", func() { networkName: uniqueNadName("spans-multiple-namespaces"), }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: allowedClient(clientPodName), - namespace: "pepe", + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: allowedClient(clientPodName), + requiresExtraNamespace: true, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, diff --git a/test/e2e/multihoming_utils.go b/test/e2e/multihoming_utils.go index 2b05c03e86..98bddbb5b4 100644 --- a/test/e2e/multihoming_utils.go +++ b/test/e2e/multihoming_utils.go @@ -90,13 +90,14 @@ func generateNetAttachDef(namespace, nadName, nadSpec string) *nadapi.NetworkAtt } type podConfiguration struct { - attachments []nadapi.NetworkSelectionElement - containerCmd []string - name string - namespace string - nodeSelector map[string]string - isPrivileged bool - labels map[string]string + attachments []nadapi.NetworkSelectionElement + containerCmd []string + name string + namespace string + nodeSelector map[string]string + isPrivileged bool + labels map[string]string + requiresExtraNamespace bool } func generatePodSpec(config podConfiguration) *v1.Pod { From 3a722c9d24d3ae870985ddd95bc45b289728e292 Mon Sep 17 00:00:00 2001 From: Riccardo Ravaioli Date: Thu, 27 Apr 2023 12:21:41 +0200 Subject: [PATCH 26/90] Filter out IP addresses added by keepalived When we set k8s.ovn.org/node-primary-ifaddr annotation on the node, we simply take the first valid IP address we find on the node gateway. We already exclude link-local addresses and those in internally reserved subnets. Let's also filter out addresses added by keepalived, which are now labeled with $iface:vip. Signed-off-by: Riccardo Ravaioli --- go-controller/pkg/util/net_linux.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/util/net_linux.go b/go-controller/pkg/util/net_linux.go index 207dd34aa7..ddcda76d4b 100644 --- a/go-controller/pkg/util/net_linux.go +++ b/go-controller/pkg/util/net_linux.go @@ -8,6 +8,7 @@ import ( "fmt" "net" "reflect" + "strings" "time" "github.com/j-keck/arping" @@ -441,6 +442,7 @@ func DeleteConntrack(ip string, port int32, protocol kapi.Protocol, ipFilterType } // GetNetworkInterfaceIPs returns the IP addresses for the network interface 'iface'. +// We filter out addresses that are link local, reserved for internal use or added by keepalived. func GetNetworkInterfaceIPs(iface string) ([]*net.IPNet, error) { link, err := netLinkOps.LinkByName(iface) if err != nil { @@ -454,7 +456,7 @@ func GetNetworkInterfaceIPs(iface string) ([]*net.IPNet, error) { var ips []*net.IPNet for _, addr := range addrs { - if addr.IP.IsLinkLocalUnicast() || IsAddressReservedForInternalUse(addr.IP) { + if addr.IP.IsLinkLocalUnicast() || IsAddressReservedForInternalUse(addr.IP) || IsAddressAddedByKeepAlived(addr) { continue } // Ignore addresses marked as secondary or deprecated since they may @@ -485,6 +487,13 @@ func IsAddressReservedForInternalUse(addr net.IP) bool { return subnet.Contains(addr) } +// IsAddressAddedByKeepAlived returns true if the input interface address obtained +// through netlink has a label that ends with ":vip", which is how keepalived +// marks the IP addresses it adds (https://github.com/openshift/machine-config-operator/pull/3683) +func IsAddressAddedByKeepAlived(addr netlink.Addr) bool { + return strings.HasSuffix(addr.Label, ":vip") +} + // GetIPv6OnSubnet when given an IPv6 address with a 128 prefix for an interface, // looks for possible broadest subnet on-link routes and returns the same address // with the found subnet prefix. Otherwise it returns the provided address unchanged. From 8ea3718e0735a7f40de516f4c6b7c94ad3cb90db Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Tue, 17 Jan 2023 09:28:57 +0200 Subject: [PATCH 27/90] Add EgressService CRD Signed-off-by: Ori Braunshtein --- .../v1/apis/clientset/versioned/clientset.go | 119 +++++++++ .../v1/apis/clientset/versioned/doc.go | 19 ++ .../versioned/fake/clientset_generated.go | 84 ++++++ .../v1/apis/clientset/versioned/fake/doc.go | 19 ++ .../apis/clientset/versioned/fake/register.go | 55 ++++ .../v1/apis/clientset/versioned/scheme/doc.go | 19 ++ .../clientset/versioned/scheme/register.go | 55 ++++ .../versioned/typed/egressservice/v1/doc.go | 19 ++ .../typed/egressservice/v1/egressservice.go | 194 ++++++++++++++ .../egressservice/v1/egressservice_client.go | 106 ++++++++ .../typed/egressservice/v1/fake/doc.go | 19 ++ .../v1/fake/fake_egressservice.go | 140 ++++++++++ .../v1/fake/fake_egressservice_client.go | 39 +++ .../egressservice/v1/generated_expansion.go | 20 ++ .../egressservice/interface.go | 45 ++++ .../egressservice/v1/egressservice.go | 89 +++++++ .../egressservice/v1/interface.go | 44 +++ .../informers/externalversions/factory.go | 250 ++++++++++++++++++ .../informers/externalversions/generic.go | 61 +++++ .../internalinterfaces/factory_interfaces.go | 39 +++ .../listers/egressservice/v1/egressservice.go | 98 +++++++ .../egressservice/v1/expansion_generated.go | 26 ++ go-controller/pkg/crd/egressservice/v1/doc.go | 4 + .../pkg/crd/egressservice/v1/register.go | 34 +++ .../pkg/crd/egressservice/v1/types.go | 70 +++++ .../egressservice/v1/zz_generated.deepcopy.go | 119 +++++++++ 26 files changed, 1786 insertions(+) create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/clientset.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/doc.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/clientset_generated.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/doc.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/register.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme/doc.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme/register.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/doc.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/egressservice.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/egressservice_client.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/doc.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/fake_egressservice.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/fake_egressservice_client.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/generated_expansion.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/interface.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1/egressservice.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1/interface.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/factory.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/generic.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1/egressservice.go create mode 100644 go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1/expansion_generated.go create mode 100644 go-controller/pkg/crd/egressservice/v1/doc.go create mode 100644 go-controller/pkg/crd/egressservice/v1/register.go create mode 100644 go-controller/pkg/crd/egressservice/v1/types.go create mode 100644 go-controller/pkg/crd/egressservice/v1/zz_generated.deepcopy.go diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/clientset.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/clientset.go new file mode 100644 index 0000000000..bebbd8be25 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/clientset.go @@ -0,0 +1,119 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package versioned + +import ( + "fmt" + "net/http" + + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1" + discovery "k8s.io/client-go/discovery" + rest "k8s.io/client-go/rest" + flowcontrol "k8s.io/client-go/util/flowcontrol" +) + +type Interface interface { + Discovery() discovery.DiscoveryInterface + K8sV1() k8sv1.K8sV1Interface +} + +// Clientset contains the clients for groups. +type Clientset struct { + *discovery.DiscoveryClient + k8sV1 *k8sv1.K8sV1Client +} + +// K8sV1 retrieves the K8sV1Client +func (c *Clientset) K8sV1() k8sv1.K8sV1Interface { + return c.k8sV1 +} + +// Discovery retrieves the DiscoveryClient +func (c *Clientset) Discovery() discovery.DiscoveryInterface { + if c == nil { + return nil + } + return c.DiscoveryClient +} + +// NewForConfig creates a new Clientset for the given config. +// If config's RateLimiter is not set and QPS and Burst are acceptable, +// NewForConfig will generate a rate-limiter in configShallowCopy. +// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient), +// where httpClient was generated with rest.HTTPClientFor(c). +func NewForConfig(c *rest.Config) (*Clientset, error) { + configShallowCopy := *c + + if configShallowCopy.UserAgent == "" { + configShallowCopy.UserAgent = rest.DefaultKubernetesUserAgent() + } + + // share the transport between all clients + httpClient, err := rest.HTTPClientFor(&configShallowCopy) + if err != nil { + return nil, err + } + + return NewForConfigAndClient(&configShallowCopy, httpClient) +} + +// NewForConfigAndClient creates a new Clientset for the given config and http client. +// Note the http client provided takes precedence over the configured transport values. +// If config's RateLimiter is not set and QPS and Burst are acceptable, +// NewForConfigAndClient will generate a rate-limiter in configShallowCopy. +func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset, error) { + configShallowCopy := *c + if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 { + if configShallowCopy.Burst <= 0 { + return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0") + } + configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst) + } + + var cs Clientset + var err error + cs.k8sV1, err = k8sv1.NewForConfigAndClient(&configShallowCopy, httpClient) + if err != nil { + return nil, err + } + + cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfigAndClient(&configShallowCopy, httpClient) + if err != nil { + return nil, err + } + return &cs, nil +} + +// NewForConfigOrDie creates a new Clientset for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *Clientset { + cs, err := NewForConfig(c) + if err != nil { + panic(err) + } + return cs +} + +// New creates a new Clientset for the given RESTClient. +func New(c rest.Interface) *Clientset { + var cs Clientset + cs.k8sV1 = k8sv1.New(c) + + cs.DiscoveryClient = discovery.NewDiscoveryClient(c) + return &cs +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/doc.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/doc.go new file mode 100644 index 0000000000..518bc288b3 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated clientset. +package versioned diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/clientset_generated.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/clientset_generated.go new file mode 100644 index 0000000000..fcc43cee87 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/clientset_generated.go @@ -0,0 +1,84 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + clientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned" + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1" + fakek8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/discovery" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/testing" +) + +// NewSimpleClientset returns a clientset that will respond with the provided objects. +// It's backed by a very simple object tracker that processes creates, updates and deletions as-is, +// without applying any validations and/or defaults. It shouldn't be considered a replacement +// for a real clientset and is mostly useful in simple unit tests. +func NewSimpleClientset(objects ...runtime.Object) *Clientset { + o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder()) + for _, obj := range objects { + if err := o.Add(obj); err != nil { + panic(err) + } + } + + cs := &Clientset{tracker: o} + cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake} + cs.AddReactor("*", "*", testing.ObjectReaction(o)) + cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) { + gvr := action.GetResource() + ns := action.GetNamespace() + watch, err := o.Watch(gvr, ns) + if err != nil { + return false, nil, err + } + return true, watch, nil + }) + + return cs +} + +// Clientset implements clientset.Interface. Meant to be embedded into a +// struct to get a default implementation. This makes faking out just the method +// you want to test easier. +type Clientset struct { + testing.Fake + discovery *fakediscovery.FakeDiscovery + tracker testing.ObjectTracker +} + +func (c *Clientset) Discovery() discovery.DiscoveryInterface { + return c.discovery +} + +func (c *Clientset) Tracker() testing.ObjectTracker { + return c.tracker +} + +var ( + _ clientset.Interface = &Clientset{} + _ testing.FakeClient = &Clientset{} +) + +// K8sV1 retrieves the K8sV1Client +func (c *Clientset) K8sV1() k8sv1.K8sV1Interface { + return &fakek8sv1.FakeK8sV1{Fake: &c.Fake} +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/doc.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/doc.go new file mode 100644 index 0000000000..19e0028ffb --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated fake clientset. +package fake diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/register.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/register.go new file mode 100644 index 0000000000..d01ce43301 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake/register.go @@ -0,0 +1,55 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var scheme = runtime.NewScheme() +var codecs = serializer.NewCodecFactory(scheme) + +var localSchemeBuilder = runtime.SchemeBuilder{ + k8sv1.AddToScheme, +} + +// AddToScheme adds all types of this clientset into the given scheme. This allows composition +// of clientsets, like in: +// +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) +// +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// +// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types +// correctly. +var AddToScheme = localSchemeBuilder.AddToScheme + +func init() { + v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"}) + utilruntime.Must(AddToScheme(scheme)) +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme/doc.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme/doc.go new file mode 100644 index 0000000000..1aec4021fc --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package contains the scheme of the automatically generated clientset. +package scheme diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme/register.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme/register.go new file mode 100644 index 0000000000..b90e803d70 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme/register.go @@ -0,0 +1,55 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package scheme + +import ( + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var Scheme = runtime.NewScheme() +var Codecs = serializer.NewCodecFactory(Scheme) +var ParameterCodec = runtime.NewParameterCodec(Scheme) +var localSchemeBuilder = runtime.SchemeBuilder{ + k8sv1.AddToScheme, +} + +// AddToScheme adds all types of this clientset into the given scheme. This allows composition +// of clientsets, like in: +// +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) +// +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// +// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types +// correctly. +var AddToScheme = localSchemeBuilder.AddToScheme + +func init() { + v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"}) + utilruntime.Must(AddToScheme(Scheme)) +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/doc.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/doc.go new file mode 100644 index 0000000000..b22b05acdb --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated typed clients. +package v1 diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/egressservice.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/egressservice.go new file mode 100644 index 0000000000..9bd850281a --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/egressservice.go @@ -0,0 +1,194 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +import ( + "context" + "time" + + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + scheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + rest "k8s.io/client-go/rest" +) + +// EgressServicesGetter has a method to return a EgressServiceInterface. +// A group's client should implement this interface. +type EgressServicesGetter interface { + EgressServices(namespace string) EgressServiceInterface +} + +// EgressServiceInterface has methods to work with EgressService resources. +type EgressServiceInterface interface { + Create(ctx context.Context, egressService *v1.EgressService, opts metav1.CreateOptions) (*v1.EgressService, error) + Update(ctx context.Context, egressService *v1.EgressService, opts metav1.UpdateOptions) (*v1.EgressService, error) + UpdateStatus(ctx context.Context, egressService *v1.EgressService, opts metav1.UpdateOptions) (*v1.EgressService, error) + Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error + Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.EgressService, error) + List(ctx context.Context, opts metav1.ListOptions) (*v1.EgressServiceList, error) + Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.EgressService, err error) + EgressServiceExpansion +} + +// egressServices implements EgressServiceInterface +type egressServices struct { + client rest.Interface + ns string +} + +// newEgressServices returns a EgressServices +func newEgressServices(c *K8sV1Client, namespace string) *egressServices { + return &egressServices{ + client: c.RESTClient(), + ns: namespace, + } +} + +// Get takes name of the egressService, and returns the corresponding egressService object, and an error if there is any. +func (c *egressServices) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.EgressService, err error) { + result = &v1.EgressService{} + err = c.client.Get(). + Namespace(c.ns). + Resource("egressservices"). + Name(name). + VersionedParams(&options, scheme.ParameterCodec). + Do(ctx). + Into(result) + return +} + +// List takes label and field selectors, and returns the list of EgressServices that match those selectors. +func (c *egressServices) List(ctx context.Context, opts metav1.ListOptions) (result *v1.EgressServiceList, err error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + result = &v1.EgressServiceList{} + err = c.client.Get(). + Namespace(c.ns). + Resource("egressservices"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Do(ctx). + Into(result) + return +} + +// Watch returns a watch.Interface that watches the requested egressServices. +func (c *egressServices) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + opts.Watch = true + return c.client.Get(). + Namespace(c.ns). + Resource("egressservices"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Watch(ctx) +} + +// Create takes the representation of a egressService and creates it. Returns the server's representation of the egressService, and an error, if there is any. +func (c *egressServices) Create(ctx context.Context, egressService *v1.EgressService, opts metav1.CreateOptions) (result *v1.EgressService, err error) { + result = &v1.EgressService{} + err = c.client.Post(). + Namespace(c.ns). + Resource("egressservices"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(egressService). + Do(ctx). + Into(result) + return +} + +// Update takes the representation of a egressService and updates it. Returns the server's representation of the egressService, and an error, if there is any. +func (c *egressServices) Update(ctx context.Context, egressService *v1.EgressService, opts metav1.UpdateOptions) (result *v1.EgressService, err error) { + result = &v1.EgressService{} + err = c.client.Put(). + Namespace(c.ns). + Resource("egressservices"). + Name(egressService.Name). + VersionedParams(&opts, scheme.ParameterCodec). + Body(egressService). + Do(ctx). + Into(result) + return +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *egressServices) UpdateStatus(ctx context.Context, egressService *v1.EgressService, opts metav1.UpdateOptions) (result *v1.EgressService, err error) { + result = &v1.EgressService{} + err = c.client.Put(). + Namespace(c.ns). + Resource("egressservices"). + Name(egressService.Name). + SubResource("status"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(egressService). + Do(ctx). + Into(result) + return +} + +// Delete takes name of the egressService and deletes it. Returns an error if one occurs. +func (c *egressServices) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { + return c.client.Delete(). + Namespace(c.ns). + Resource("egressservices"). + Name(name). + Body(&opts). + Do(ctx). + Error() +} + +// DeleteCollection deletes a collection of objects. +func (c *egressServices) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { + var timeout time.Duration + if listOpts.TimeoutSeconds != nil { + timeout = time.Duration(*listOpts.TimeoutSeconds) * time.Second + } + return c.client.Delete(). + Namespace(c.ns). + Resource("egressservices"). + VersionedParams(&listOpts, scheme.ParameterCodec). + Timeout(timeout). + Body(&opts). + Do(ctx). + Error() +} + +// Patch applies the patch and returns the patched egressService. +func (c *egressServices) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.EgressService, err error) { + result = &v1.EgressService{} + err = c.client.Patch(pt). + Namespace(c.ns). + Resource("egressservices"). + Name(name). + SubResource(subresources...). + VersionedParams(&opts, scheme.ParameterCodec). + Body(data). + Do(ctx). + Into(result) + return +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/egressservice_client.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/egressservice_client.go new file mode 100644 index 0000000000..3bbe956349 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/egressservice_client.go @@ -0,0 +1,106 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +import ( + "net/http" + + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme" + rest "k8s.io/client-go/rest" +) + +type K8sV1Interface interface { + RESTClient() rest.Interface + EgressServicesGetter +} + +// K8sV1Client is used to interact with features provided by the k8s.ovn.org group. +type K8sV1Client struct { + restClient rest.Interface +} + +func (c *K8sV1Client) EgressServices(namespace string) EgressServiceInterface { + return newEgressServices(c, namespace) +} + +// NewForConfig creates a new K8sV1Client for the given config. +// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient), +// where httpClient was generated with rest.HTTPClientFor(c). +func NewForConfig(c *rest.Config) (*K8sV1Client, error) { + config := *c + if err := setConfigDefaults(&config); err != nil { + return nil, err + } + httpClient, err := rest.HTTPClientFor(&config) + if err != nil { + return nil, err + } + return NewForConfigAndClient(&config, httpClient) +} + +// NewForConfigAndClient creates a new K8sV1Client for the given config and http client. +// Note the http client provided takes precedence over the configured transport values. +func NewForConfigAndClient(c *rest.Config, h *http.Client) (*K8sV1Client, error) { + config := *c + if err := setConfigDefaults(&config); err != nil { + return nil, err + } + client, err := rest.RESTClientForConfigAndClient(&config, h) + if err != nil { + return nil, err + } + return &K8sV1Client{client}, nil +} + +// NewForConfigOrDie creates a new K8sV1Client for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *K8sV1Client { + client, err := NewForConfig(c) + if err != nil { + panic(err) + } + return client +} + +// New creates a new K8sV1Client for the given RESTClient. +func New(c rest.Interface) *K8sV1Client { + return &K8sV1Client{c} +} + +func setConfigDefaults(config *rest.Config) error { + gv := v1.SchemeGroupVersion + config.GroupVersion = &gv + config.APIPath = "/apis" + config.NegotiatedSerializer = scheme.Codecs.WithoutConversion() + + if config.UserAgent == "" { + config.UserAgent = rest.DefaultKubernetesUserAgent() + } + + return nil +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *K8sV1Client) RESTClient() rest.Interface { + if c == nil { + return nil + } + return c.restClient +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/doc.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/doc.go new file mode 100644 index 0000000000..422564f2d5 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// Package fake has the automatically generated clients. +package fake diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/fake_egressservice.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/fake_egressservice.go new file mode 100644 index 0000000000..59d84ae664 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/fake_egressservice.go @@ -0,0 +1,140 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + "context" + + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + labels "k8s.io/apimachinery/pkg/labels" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + testing "k8s.io/client-go/testing" +) + +// FakeEgressServices implements EgressServiceInterface +type FakeEgressServices struct { + Fake *FakeK8sV1 + ns string +} + +var egressservicesResource = v1.SchemeGroupVersion.WithResource("egressservices") + +var egressservicesKind = v1.SchemeGroupVersion.WithKind("EgressService") + +// Get takes name of the egressService, and returns the corresponding egressService object, and an error if there is any. +func (c *FakeEgressServices) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.EgressService, err error) { + obj, err := c.Fake. + Invokes(testing.NewGetAction(egressservicesResource, c.ns, name), &v1.EgressService{}) + + if obj == nil { + return nil, err + } + return obj.(*v1.EgressService), err +} + +// List takes label and field selectors, and returns the list of EgressServices that match those selectors. +func (c *FakeEgressServices) List(ctx context.Context, opts metav1.ListOptions) (result *v1.EgressServiceList, err error) { + obj, err := c.Fake. + Invokes(testing.NewListAction(egressservicesResource, egressservicesKind, c.ns, opts), &v1.EgressServiceList{}) + + if obj == nil { + return nil, err + } + + label, _, _ := testing.ExtractFromListOptions(opts) + if label == nil { + label = labels.Everything() + } + list := &v1.EgressServiceList{ListMeta: obj.(*v1.EgressServiceList).ListMeta} + for _, item := range obj.(*v1.EgressServiceList).Items { + if label.Matches(labels.Set(item.Labels)) { + list.Items = append(list.Items, item) + } + } + return list, err +} + +// Watch returns a watch.Interface that watches the requested egressServices. +func (c *FakeEgressServices) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { + return c.Fake. + InvokesWatch(testing.NewWatchAction(egressservicesResource, c.ns, opts)) + +} + +// Create takes the representation of a egressService and creates it. Returns the server's representation of the egressService, and an error, if there is any. +func (c *FakeEgressServices) Create(ctx context.Context, egressService *v1.EgressService, opts metav1.CreateOptions) (result *v1.EgressService, err error) { + obj, err := c.Fake. + Invokes(testing.NewCreateAction(egressservicesResource, c.ns, egressService), &v1.EgressService{}) + + if obj == nil { + return nil, err + } + return obj.(*v1.EgressService), err +} + +// Update takes the representation of a egressService and updates it. Returns the server's representation of the egressService, and an error, if there is any. +func (c *FakeEgressServices) Update(ctx context.Context, egressService *v1.EgressService, opts metav1.UpdateOptions) (result *v1.EgressService, err error) { + obj, err := c.Fake. + Invokes(testing.NewUpdateAction(egressservicesResource, c.ns, egressService), &v1.EgressService{}) + + if obj == nil { + return nil, err + } + return obj.(*v1.EgressService), err +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *FakeEgressServices) UpdateStatus(ctx context.Context, egressService *v1.EgressService, opts metav1.UpdateOptions) (*v1.EgressService, error) { + obj, err := c.Fake. + Invokes(testing.NewUpdateSubresourceAction(egressservicesResource, "status", c.ns, egressService), &v1.EgressService{}) + + if obj == nil { + return nil, err + } + return obj.(*v1.EgressService), err +} + +// Delete takes name of the egressService and deletes it. Returns an error if one occurs. +func (c *FakeEgressServices) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { + _, err := c.Fake. + Invokes(testing.NewDeleteActionWithOptions(egressservicesResource, c.ns, name, opts), &v1.EgressService{}) + + return err +} + +// DeleteCollection deletes a collection of objects. +func (c *FakeEgressServices) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { + action := testing.NewDeleteCollectionAction(egressservicesResource, c.ns, listOpts) + + _, err := c.Fake.Invokes(action, &v1.EgressServiceList{}) + return err +} + +// Patch applies the patch and returns the patched egressService. +func (c *FakeEgressServices) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.EgressService, err error) { + obj, err := c.Fake. + Invokes(testing.NewPatchSubresourceAction(egressservicesResource, c.ns, name, pt, data, subresources...), &v1.EgressService{}) + + if obj == nil { + return nil, err + } + return obj.(*v1.EgressService), err +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/fake_egressservice_client.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/fake_egressservice_client.go new file mode 100644 index 0000000000..7378a5ce6b --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/fake/fake_egressservice_client.go @@ -0,0 +1,39 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1" + rest "k8s.io/client-go/rest" + testing "k8s.io/client-go/testing" +) + +type FakeK8sV1 struct { + *testing.Fake +} + +func (c *FakeK8sV1) EgressServices(namespace string) v1.EgressServiceInterface { + return &FakeEgressServices{c, namespace} +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *FakeK8sV1) RESTClient() rest.Interface { + var ret *rest.RESTClient + return ret +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/generated_expansion.go b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/generated_expansion.go new file mode 100644 index 0000000000..789dfc3dc0 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/typed/egressservice/v1/generated_expansion.go @@ -0,0 +1,20 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +type EgressServiceExpansion interface{} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/interface.go b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/interface.go new file mode 100644 index 0000000000..f434e82805 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/interface.go @@ -0,0 +1,45 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package egressservice + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/internalinterfaces" +) + +// Interface provides access to each of this group's versions. +type Interface interface { + // V1 provides access to shared informers for resources in V1. + V1() v1.Interface +} + +type group struct { + factory internalinterfaces.SharedInformerFactory + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// New returns a new Interface. +func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { + return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} +} + +// V1 returns a new v1.Interface. +func (g *group) V1() v1.Interface { + return v1.New(g.factory, g.namespace, g.tweakListOptions) +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1/egressservice.go b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1/egressservice.go new file mode 100644 index 0000000000..724c0220b1 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1/egressservice.go @@ -0,0 +1,89 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package v1 + +import ( + "context" + time "time" + + egressservicev1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/internalinterfaces" + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// EgressServiceInformer provides access to a shared informer and lister for +// EgressServices. +type EgressServiceInformer interface { + Informer() cache.SharedIndexInformer + Lister() v1.EgressServiceLister +} + +type egressServiceInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc + namespace string +} + +// NewEgressServiceInformer constructs a new informer for EgressService type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewEgressServiceInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredEgressServiceInformer(client, namespace, resyncPeriod, indexers, nil) +} + +// NewFilteredEgressServiceInformer constructs a new informer for EgressService type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredEgressServiceInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.K8sV1().EgressServices(namespace).List(context.TODO(), options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.K8sV1().EgressServices(namespace).Watch(context.TODO(), options) + }, + }, + &egressservicev1.EgressService{}, + resyncPeriod, + indexers, + ) +} + +func (f *egressServiceInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredEgressServiceInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *egressServiceInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&egressservicev1.EgressService{}, f.defaultInformer) +} + +func (f *egressServiceInformer) Lister() v1.EgressServiceLister { + return v1.NewEgressServiceLister(f.Informer().GetIndexer()) +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1/interface.go b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1/interface.go new file mode 100644 index 0000000000..1f88db4b63 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1/interface.go @@ -0,0 +1,44 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package v1 + +import ( + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/internalinterfaces" +) + +// Interface provides access to all the informers in this group version. +type Interface interface { + // EgressServices returns a EgressServiceInformer. + EgressServices() EgressServiceInformer +} + +type version struct { + factory internalinterfaces.SharedInformerFactory + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// New returns a new Interface. +func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { + return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} +} + +// EgressServices returns a EgressServiceInformer. +func (v *version) EgressServices() EgressServiceInformer { + return &egressServiceInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/factory.go b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/factory.go new file mode 100644 index 0000000000..2aac8557d0 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/factory.go @@ -0,0 +1,250 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package externalversions + +import ( + reflect "reflect" + sync "sync" + time "time" + + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned" + egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/internalinterfaces" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + cache "k8s.io/client-go/tools/cache" +) + +// SharedInformerOption defines the functional option type for SharedInformerFactory. +type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory + +type sharedInformerFactory struct { + client versioned.Interface + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc + lock sync.Mutex + defaultResync time.Duration + customResync map[reflect.Type]time.Duration + + informers map[reflect.Type]cache.SharedIndexInformer + // startedInformers is used for tracking which informers have been started. + // This allows Start() to be called multiple times safely. + startedInformers map[reflect.Type]bool + // wg tracks how many goroutines were started. + wg sync.WaitGroup + // shuttingDown is true when Shutdown has been called. It may still be running + // because it needs to wait for goroutines. + shuttingDown bool +} + +// WithCustomResyncConfig sets a custom resync period for the specified informer types. +func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + for k, v := range resyncConfig { + factory.customResync[reflect.TypeOf(k)] = v + } + return factory + } +} + +// WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory. +func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.tweakListOptions = tweakListOptions + return factory + } +} + +// WithNamespace limits the SharedInformerFactory to the specified namespace. +func WithNamespace(namespace string) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.namespace = namespace + return factory + } +} + +// NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces. +func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory { + return NewSharedInformerFactoryWithOptions(client, defaultResync) +} + +// NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory. +// Listers obtained via this SharedInformerFactory will be subject to the same filters +// as specified here. +// Deprecated: Please use NewSharedInformerFactoryWithOptions instead +func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory { + return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions)) +} + +// NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options. +func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory { + factory := &sharedInformerFactory{ + client: client, + namespace: v1.NamespaceAll, + defaultResync: defaultResync, + informers: make(map[reflect.Type]cache.SharedIndexInformer), + startedInformers: make(map[reflect.Type]bool), + customResync: make(map[reflect.Type]time.Duration), + } + + // Apply all options + for _, opt := range options { + factory = opt(factory) + } + + return factory +} + +func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) { + f.lock.Lock() + defer f.lock.Unlock() + + if f.shuttingDown { + return + } + + for informerType, informer := range f.informers { + if !f.startedInformers[informerType] { + f.wg.Add(1) + // We need a new variable in each loop iteration, + // otherwise the goroutine would use the loop variable + // and that keeps changing. + informer := informer + go func() { + defer f.wg.Done() + informer.Run(stopCh) + }() + f.startedInformers[informerType] = true + } + } +} + +func (f *sharedInformerFactory) Shutdown() { + f.lock.Lock() + f.shuttingDown = true + f.lock.Unlock() + + // Will return immediately if there is nothing to wait for. + f.wg.Wait() +} + +func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { + informers := func() map[reflect.Type]cache.SharedIndexInformer { + f.lock.Lock() + defer f.lock.Unlock() + + informers := map[reflect.Type]cache.SharedIndexInformer{} + for informerType, informer := range f.informers { + if f.startedInformers[informerType] { + informers[informerType] = informer + } + } + return informers + }() + + res := map[reflect.Type]bool{} + for informType, informer := range informers { + res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced) + } + return res +} + +// InternalInformerFor returns the SharedIndexInformer for obj using an internal +// client. +func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer { + f.lock.Lock() + defer f.lock.Unlock() + + informerType := reflect.TypeOf(obj) + informer, exists := f.informers[informerType] + if exists { + return informer + } + + resyncPeriod, exists := f.customResync[informerType] + if !exists { + resyncPeriod = f.defaultResync + } + + informer = newFunc(f.client, resyncPeriod) + f.informers[informerType] = informer + + return informer +} + +// SharedInformerFactory provides shared informers for resources in all known +// API group versions. +// +// It is typically used like this: +// +// ctx, cancel := context.Background() +// defer cancel() +// factory := NewSharedInformerFactory(client, resyncPeriod) +// defer factory.WaitForStop() // Returns immediately if nothing was started. +// genericInformer := factory.ForResource(resource) +// typedInformer := factory.SomeAPIGroup().V1().SomeType() +// factory.Start(ctx.Done()) // Start processing these informers. +// synced := factory.WaitForCacheSync(ctx.Done()) +// for v, ok := range synced { +// if !ok { +// fmt.Fprintf(os.Stderr, "caches failed to sync: %v", v) +// return +// } +// } +// +// // Creating informers can also be created after Start, but then +// // Start must be called again: +// anotherGenericInformer := factory.ForResource(resource) +// factory.Start(ctx.Done()) +type SharedInformerFactory interface { + internalinterfaces.SharedInformerFactory + + // Start initializes all requested informers. They are handled in goroutines + // which run until the stop channel gets closed. + Start(stopCh <-chan struct{}) + + // Shutdown marks a factory as shutting down. At that point no new + // informers can be started anymore and Start will return without + // doing anything. + // + // In addition, Shutdown blocks until all goroutines have terminated. For that + // to happen, the close channel(s) that they were started with must be closed, + // either before Shutdown gets called or while it is waiting. + // + // Shutdown may be called multiple times, even concurrently. All such calls will + // block until all goroutines have terminated. + Shutdown() + + // WaitForCacheSync blocks until all started informers' caches were synced + // or the stop channel gets closed. + WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool + + // ForResource gives generic access to a shared informer of the matching type. + ForResource(resource schema.GroupVersionResource) (GenericInformer, error) + + // InternalInformerFor returns the SharedIndexInformer for obj using an internal + // client. + InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer + + K8s() egressservice.Interface +} + +func (f *sharedInformerFactory) K8s() egressservice.Interface { + return egressservice.New(f, f.namespace, f.tweakListOptions) +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/generic.go b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/generic.go new file mode 100644 index 0000000000..65015fb442 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/generic.go @@ -0,0 +1,61 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package externalversions + +import ( + "fmt" + + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + schema "k8s.io/apimachinery/pkg/runtime/schema" + cache "k8s.io/client-go/tools/cache" +) + +// GenericInformer is type of SharedIndexInformer which will locate and delegate to other +// sharedInformers based on type +type GenericInformer interface { + Informer() cache.SharedIndexInformer + Lister() cache.GenericLister +} + +type genericInformer struct { + informer cache.SharedIndexInformer + resource schema.GroupResource +} + +// Informer returns the SharedIndexInformer. +func (f *genericInformer) Informer() cache.SharedIndexInformer { + return f.informer +} + +// Lister returns the GenericLister. +func (f *genericInformer) Lister() cache.GenericLister { + return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource) +} + +// ForResource gives generic access to a shared informer of the matching type +// TODO extend this to unknown resources with a client pool +func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) { + switch resource { + // Group=k8s.ovn.org, Version=v1 + case v1.SchemeGroupVersion.WithResource("egressservices"): + return &genericInformer{resource: resource.GroupResource(), informer: f.K8s().V1().EgressServices().Informer()}, nil + + } + + return nil, fmt.Errorf("no informer found for %v", resource) +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go new file mode 100644 index 0000000000..5e2e38e859 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go @@ -0,0 +1,39 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package internalinterfaces + +import ( + time "time" + + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + cache "k8s.io/client-go/tools/cache" +) + +// NewInformerFunc takes versioned.Interface and time.Duration to return a SharedIndexInformer. +type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer + +// SharedInformerFactory a small interface to allow for adding an informer without an import cycle +type SharedInformerFactory interface { + Start(stopCh <-chan struct{}) + InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer +} + +// TweakListOptionsFunc is a function that transforms a v1.ListOptions. +type TweakListOptionsFunc func(*v1.ListOptions) diff --git a/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1/egressservice.go b/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1/egressservice.go new file mode 100644 index 0000000000..aba1b0c106 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1/egressservice.go @@ -0,0 +1,98 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" +) + +// EgressServiceLister helps list EgressServices. +// All objects returned here must be treated as read-only. +type EgressServiceLister interface { + // List lists all EgressServices in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1.EgressService, err error) + // EgressServices returns an object that can list and get EgressServices. + EgressServices(namespace string) EgressServiceNamespaceLister + EgressServiceListerExpansion +} + +// egressServiceLister implements the EgressServiceLister interface. +type egressServiceLister struct { + indexer cache.Indexer +} + +// NewEgressServiceLister returns a new EgressServiceLister. +func NewEgressServiceLister(indexer cache.Indexer) EgressServiceLister { + return &egressServiceLister{indexer: indexer} +} + +// List lists all EgressServices in the indexer. +func (s *egressServiceLister) List(selector labels.Selector) (ret []*v1.EgressService, err error) { + err = cache.ListAll(s.indexer, selector, func(m interface{}) { + ret = append(ret, m.(*v1.EgressService)) + }) + return ret, err +} + +// EgressServices returns an object that can list and get EgressServices. +func (s *egressServiceLister) EgressServices(namespace string) EgressServiceNamespaceLister { + return egressServiceNamespaceLister{indexer: s.indexer, namespace: namespace} +} + +// EgressServiceNamespaceLister helps list and get EgressServices. +// All objects returned here must be treated as read-only. +type EgressServiceNamespaceLister interface { + // List lists all EgressServices in the indexer for a given namespace. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1.EgressService, err error) + // Get retrieves the EgressService from the indexer for a given namespace and name. + // Objects returned here must be treated as read-only. + Get(name string) (*v1.EgressService, error) + EgressServiceNamespaceListerExpansion +} + +// egressServiceNamespaceLister implements the EgressServiceNamespaceLister +// interface. +type egressServiceNamespaceLister struct { + indexer cache.Indexer + namespace string +} + +// List lists all EgressServices in the indexer for a given namespace. +func (s egressServiceNamespaceLister) List(selector labels.Selector) (ret []*v1.EgressService, err error) { + err = cache.ListAllByNamespace(s.indexer, s.namespace, selector, func(m interface{}) { + ret = append(ret, m.(*v1.EgressService)) + }) + return ret, err +} + +// Get retrieves the EgressService from the indexer for a given namespace and name. +func (s egressServiceNamespaceLister) Get(name string) (*v1.EgressService, error) { + obj, exists, err := s.indexer.GetByKey(s.namespace + "/" + name) + if err != nil { + return nil, err + } + if !exists { + return nil, errors.NewNotFound(v1.Resource("egressservice"), name) + } + return obj.(*v1.EgressService), nil +} diff --git a/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1/expansion_generated.go b/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1/expansion_generated.go new file mode 100644 index 0000000000..9e7a40e22d --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1/expansion_generated.go @@ -0,0 +1,26 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package v1 + +// EgressServiceListerExpansion allows custom methods to be added to +// EgressServiceLister. +type EgressServiceListerExpansion interface{} + +// EgressServiceNamespaceListerExpansion allows custom methods to be added to +// EgressServiceNamespaceLister. +type EgressServiceNamespaceListerExpansion interface{} diff --git a/go-controller/pkg/crd/egressservice/v1/doc.go b/go-controller/pkg/crd/egressservice/v1/doc.go new file mode 100644 index 0000000000..5703f91c44 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/doc.go @@ -0,0 +1,4 @@ +// Package v1 contains API Schema definitions for the network v1 API group +// +k8s:deepcopy-gen=package +// +groupName=k8s.ovn.org +package v1 diff --git a/go-controller/pkg/crd/egressservice/v1/register.go b/go-controller/pkg/crd/egressservice/v1/register.go new file mode 100644 index 0000000000..6706793dcb --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/register.go @@ -0,0 +1,34 @@ +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + GroupName = "k8s.ovn.org" + SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1"} + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + AddToScheme = SchemeBuilder.AddToScheme +) + +// Kind takes an unqualified kind and returns back a Group qualified GroupKind +func Kind(kind string) schema.GroupKind { + return SchemeGroupVersion.WithKind(kind).GroupKind() +} + +// Resource takes an unqualified resource and returns a Group qualified GroupResource +func Resource(resource string) schema.GroupResource { + return SchemeGroupVersion.WithResource(resource).GroupResource() +} + +// Adds the list of known types to api.Scheme. +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &EgressService{}, + &EgressServiceList{}, + ) + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/go-controller/pkg/crd/egressservice/v1/types.go b/go-controller/pkg/crd/egressservice/v1/types.go new file mode 100644 index 0000000000..4032f0ba08 --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/types.go @@ -0,0 +1,70 @@ +/* +Copyright 2022. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:resource:path=egressservices +// +kubebuilder::singular=egressservice +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// EgressService is a CRD that allows the user to request that the source +// IP of egress packets originating from all of the pods that are endpoints +// of a given LoadBalancer Service would be its ingress IP. +type EgressService struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec EgressServiceSpec `json:"spec,omitempty"` + Status EgressServiceStatus `json:"status,omitempty"` +} + +// EgressServiceSpec defines the desired state of EgressService +type EgressServiceSpec struct { + // Allows limiting the nodes that can be selected to handle the service's traffic. + // When present only a node whose labels match the specified selectors can be selected + // for handling the service's traffic. + // When it is not specified any node in the cluster can be chosen to manage the service's traffic. + // +optional + NodeSelector metav1.LabelSelector `json:"nodeSelector,omitempty"` + + // The network which this service should send egress and corresponding ingress replies to. + // This is typically implemented as VRF mapping, representing a numeric id or string name + // of a routing table which by omission uses the default host routing. + // +optional + Network string `json:"network,omitempty"` +} + +// EgressServiceStatus defines the observed state of EgressService +type EgressServiceStatus struct { + // The name of the node selected to handle the service's traffic. + Host string `json:"host"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:resource:path=egressservices +// +kubebuilder::singular=egressservice +// EgressServiceList contains a list of EgressServices +type EgressServiceList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []EgressService `json:"items"` +} diff --git a/go-controller/pkg/crd/egressservice/v1/zz_generated.deepcopy.go b/go-controller/pkg/crd/egressservice/v1/zz_generated.deepcopy.go new file mode 100644 index 0000000000..7049af399c --- /dev/null +++ b/go-controller/pkg/crd/egressservice/v1/zz_generated.deepcopy.go @@ -0,0 +1,119 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1 + +import ( + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EgressService) DeepCopyInto(out *EgressService) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + out.Status = in.Status + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EgressService. +func (in *EgressService) DeepCopy() *EgressService { + if in == nil { + return nil + } + out := new(EgressService) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *EgressService) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EgressServiceList) DeepCopyInto(out *EgressServiceList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]EgressService, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EgressServiceList. +func (in *EgressServiceList) DeepCopy() *EgressServiceList { + if in == nil { + return nil + } + out := new(EgressServiceList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *EgressServiceList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EgressServiceSpec) DeepCopyInto(out *EgressServiceSpec) { + *out = *in + in.NodeSelector.DeepCopyInto(&out.NodeSelector) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EgressServiceSpec. +func (in *EgressServiceSpec) DeepCopy() *EgressServiceSpec { + if in == nil { + return nil + } + out := new(EgressServiceSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EgressServiceStatus) DeepCopyInto(out *EgressServiceStatus) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EgressServiceStatus. +func (in *EgressServiceStatus) DeepCopy() *EgressServiceStatus { + if in == nil { + return nil + } + out := new(EgressServiceStatus) + in.DeepCopyInto(out) + return out +} From 7d94e6095bdb15a8b29651b81b15de0eb44b5e5e Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Tue, 17 Jan 2023 09:30:39 +0200 Subject: [PATCH 28/90] Remove old egress-service annotations Signed-off-by: Ori Braunshtein --- go-controller/pkg/util/service_annotations.go | 69 ----- .../pkg/util/service_annotations_test.go | 275 ------------------ 2 files changed, 344 deletions(-) delete mode 100644 go-controller/pkg/util/service_annotations.go delete mode 100644 go-controller/pkg/util/service_annotations_test.go diff --git a/go-controller/pkg/util/service_annotations.go b/go-controller/pkg/util/service_annotations.go deleted file mode 100644 index 58820cc030..0000000000 --- a/go-controller/pkg/util/service_annotations.go +++ /dev/null @@ -1,69 +0,0 @@ -package util - -import ( - "encoding/json" - "fmt" - - kapi "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -const ( - EgressSVCAnnotation = "k8s.ovn.org/egress-service" - EgressSVCHostAnnotation = "k8s.ovn.org/egress-service-host" - EgressSVCLabelPrefix = "egress-service.k8s.ovn.org" -) - -type EgressSVCConfig struct { - NodeSelector metav1.LabelSelector `json:"nodeSelector,omitempty"` -} - -// ParseEgressSVCAnnotation returns the parsed egress-service annotation. -func ParseEgressSVCAnnotation(annotations map[string]string) (*EgressSVCConfig, error) { - anno, ok := annotations[EgressSVCAnnotation] - if !ok { - return nil, newAnnotationNotSetError("%s annotation not found", EgressSVCAnnotation) - } - - cfg := &EgressSVCConfig{} - if err := json.Unmarshal([]byte(anno), &cfg); err != nil { - return nil, fmt.Errorf("failed to unmarshal egress svc config annotation value %s: %v", anno, err) - } - - _, err := metav1.LabelSelectorAsSelector(&cfg.NodeSelector) - if err != nil { - return nil, fmt.Errorf("failed to parse the nodeSelector: %v", err) - } - - return cfg, nil -} - -// HasEgressSVCAnnotation returns true if the service has an egress-service -// config annotation. -func HasEgressSVCAnnotation(svc *kapi.Service) bool { - _, ok := svc.Annotations[EgressSVCAnnotation] - return ok -} - -// HasEgressSVCHostAnnotation returns true if the service has an egress-service-host -// annotation. -func HasEgressSVCHostAnnotation(svc *kapi.Service) bool { - _, ok := svc.Annotations[EgressSVCHostAnnotation] - return ok -} - -// GetEgressSVCHost returns the egress-service-host annotation value. -func GetEgressSVCHost(svc *kapi.Service) (string, error) { - host, ok := svc.Annotations[EgressSVCHostAnnotation] - if !ok { - return "", newAnnotationNotSetError("%s annotation not found for service %s/%s", EgressSVCHostAnnotation, svc.Namespace, svc.Name) - } - - return host, nil -} - -// EgressSVCHostChanged returns true if both services have the same -// egress-service-host annotation value. -func EgressSVCHostChanged(oldSVC, newSVC *kapi.Service) bool { - return oldSVC.Annotations[EgressSVCHostAnnotation] != newSVC.Annotations[EgressSVCHostAnnotation] -} diff --git a/go-controller/pkg/util/service_annotations_test.go b/go-controller/pkg/util/service_annotations_test.go deleted file mode 100644 index 6de72e3843..0000000000 --- a/go-controller/pkg/util/service_annotations_test.go +++ /dev/null @@ -1,275 +0,0 @@ -package util - -import ( - "fmt" - "testing" - - "github.com/stretchr/testify/assert" - kapi "k8s.io/api/core/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestParseEgressSVCAnnotation(t *testing.T) { - tests := []struct { - desc string - annotations map[string]string - errMatch error - }{ - { - desc: "valid empty annotation should work", - annotations: map[string]string{ - "k8s.ovn.org/egress-service": "{}", - }, - }, - { - desc: "valid annotation with matchLabels in nodeSelector should work", - annotations: map[string]string{ - "k8s.ovn.org/egress-service": "{\"nodeSelector\":{\"matchLabels\":{\"happy\": \"true\"}}}", - }, - }, - { - desc: "valid annotation with matchLabels in nodeSelector should work", - annotations: map[string]string{ - "k8s.ovn.org/egress-service": "{\"nodeSelector\":{\"matchExpressions\":[{\"key\": \"happy\",\"operator\": \"In\",\"values\":[\"true\"]}]}}", - }, - }, - { - desc: "missing annotation should fail", - annotations: nil, - errMatch: fmt.Errorf("k8s.ovn.org/egress-service annotation not found"), - }, - { - desc: "invalid annotation should fail", - annotations: map[string]string{ - "k8s.ovn.org/egress-service": "{&&}", - }, - errMatch: fmt.Errorf("failed to unmarshal egress svc config"), - }, - { - desc: "invalid matchLabels should fail", - annotations: map[string]string{ - "k8s.ovn.org/egress-service": "{\"nodeSelector\":{\"matchLabels\":{\"$hould\": \"F@il\"}}}", - }, - errMatch: fmt.Errorf("failed to parse the nodeSelector"), - }, - { - desc: "invalid matchExpressions should fail", - annotations: map[string]string{ - "k8s.ovn.org/egress-service": "{\"nodeSelector\":{\"matchExpressions\":[{\"key\": \"sad\",\"operator\": \"rainy\",\"values\":[\"true\"]}]}}", - }, - errMatch: fmt.Errorf("failed to parse the nodeSelector"), - }, - } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - res, e := ParseEgressSVCAnnotation(tc.annotations) - t.Log(res) - if tc.errMatch != nil { - assert.Contains(t, e.Error(), tc.errMatch.Error()) - } else { - assert.Nil(t, e) - assert.NotNil(t, res) - } - }) - } -} - -func TestHasEgressServiceAnnotation(t *testing.T) { - tests := []struct { - desc string - svc *kapi.Service - expected bool - }{ - { - desc: "a service with the annotation should be considered an egress service", - svc: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "k8s.ovn.org/egress-service": "{}", - }, - }, - }, - expected: true, - }, - { - desc: "a service without the proper annotation should not be considered an egress service", - svc: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "unrelated": "value", - }, - }, - }, - expected: false, - }, - { - desc: "a service without annotations should not be considered an egress service", - svc: &kapi.Service{}, - expected: false, - }, - } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - res := HasEgressSVCAnnotation(tc.svc) - t.Log(res) - assert.Equal(t, res, tc.expected) - }) - } -} - -func TestHasEgressSVCHostAnnotation(t *testing.T) { - tests := []struct { - desc string - svc *kapi.Service - expected bool - }{ - { - desc: "a service with the annotation should be considered as one that has a host", - svc: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "k8s.ovn.org/egress-service-host": "dummy", - }, - }, - }, - expected: true, - }, - { - desc: "a service without the proper annotation should not be considered as having a host", - svc: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "unrelated": "value", - }, - }, - }, - expected: false, - }, - { - desc: "a service without annotations should not be considered as having a host", - svc: &kapi.Service{}, - expected: false, - }, - } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - res := HasEgressSVCHostAnnotation(tc.svc) - t.Log(res) - assert.Equal(t, res, tc.expected) - }) - } -} - -func TestGetEgressSVCHost(t *testing.T) { - tests := []struct { - desc string - svc *kapi.Service - expected string - errMatch error - }{ - { - desc: "a service with the annotation should be considered as one that has a host", - svc: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "k8s.ovn.org/egress-service-host": "dummy", - }, - }, - }, - expected: "dummy", - }, - { - desc: "a service without the proper annotation should not return a host", - svc: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "unrelated": "value", - }, - }, - }, - errMatch: fmt.Errorf("annotation not found for service"), - }, - { - desc: "a service without annotations should not return a host", - svc: &kapi.Service{}, - errMatch: fmt.Errorf("annotation not found for service"), - }, - } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - res, e := GetEgressSVCHost(tc.svc) - t.Log(res) - if tc.errMatch != nil { - assert.Contains(t, e.Error(), tc.errMatch.Error()) - } else { - assert.Nil(t, e) - assert.NotNil(t, res) - } - }) - } -} - -func TestEgressSVCHostChanged(t *testing.T) { - tests := []struct { - desc string - svc1 *kapi.Service - svc2 *kapi.Service - expected bool - }{ - { - desc: "same host should be false", - svc1: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "k8s.ovn.org/egress-service-host": "dummy", - }, - }, - }, - svc2: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "k8s.ovn.org/egress-service-host": "dummy", - }, - }, - }, - expected: false, - }, - { - desc: "different host should be true", - svc1: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "k8s.ovn.org/egress-service-host": "dummy", - }, - }, - }, - svc2: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "k8s.ovn.org/egress-service-host": "dummy2", - }, - }, - }, - expected: true, - }, - { - desc: "host and empty should be considered a change", - svc1: &kapi.Service{ - ObjectMeta: v1.ObjectMeta{ - Annotations: map[string]string{ - "k8s.ovn.org/egress-service-host": "dummy", - }, - }, - }, - svc2: &kapi.Service{}, - expected: true, - }, - } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - res := EgressSVCHostChanged(tc.svc1, tc.svc2) - t.Log(res) - assert.Equal(t, res, tc.expected) - }) - } -} From bf04cc0549ff9e5292316463533b0640165e4a73 Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Tue, 17 Jan 2023 10:49:47 +0200 Subject: [PATCH 29/90] EgressService refactor: ovnkube-master Create a new controller for EgressService objects, reusing most of the existing logic Signed-off-by: Ori Braunshtein --- go-controller/pkg/config/config.go | 7 + go-controller/pkg/factory/factory.go | 86 ++- go-controller/pkg/factory/factory_test.go | 103 +++ go-controller/pkg/factory/handler.go | 3 + go-controller/pkg/kube/kube.go | 15 + go-controller/pkg/kube/mocks/Interface.go | 16 +- .../network_controller_manager.go | 1 + .../egress_services_controller.go | 571 +++++++++++++--- .../egress_services_endpointslice.go | 4 +- .../egress_services/egress_services_node.go | 14 +- .../egress_services_service.go | 363 ++-------- .../pkg/ovn/default_network_controller.go | 23 +- go-controller/pkg/ovn/egressservices_test.go | 644 ++++++++++++------ go-controller/pkg/ovn/hybrid_test.go | 9 + go-controller/pkg/ovn/master_test.go | 3 + go-controller/pkg/ovn/ovn.go | 16 +- go-controller/pkg/ovn/ovn_test.go | 7 + go-controller/pkg/util/kube.go | 12 +- 18 files changed, 1229 insertions(+), 668 deletions(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index d64e4daeba..654df1f6ca 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -354,6 +354,7 @@ type OVNKubernetesFeatureConfig struct { EgressIPReachabiltyTotalTimeout int `gcfg:"egressip-reachability-total-timeout"` EnableEgressFirewall bool `gcfg:"enable-egress-firewall"` EnableEgressQoS bool `gcfg:"enable-egress-qos"` + EnableEgressService bool `gcfg:"enable-egress-service"` EgressIPNodeHealthCheckPort int `gcfg:"egressip-node-healthcheck-port"` EnableMultiNetwork bool `gcfg:"enable-multi-network"` EnableMultiNetworkPolicy bool `gcfg:"enable-multi-networkpolicy"` @@ -963,6 +964,12 @@ var OVNK8sFeatureFlags = []cli.Flag{ Destination: &cliConfig.OVNKubernetesFeature.EnableInterconnect, Value: OVNKubernetesFeature.EnableInterconnect, }, + &cli.BoolFlag{ + Name: "enable-egress-service", + Usage: "Configure to use EgressService CRD feature with ovn-kubernetes.", + Destination: &cliConfig.OVNKubernetesFeature.EnableEgressService, + Value: OVNKubernetesFeature.EnableEgressService, + }, } // K8sFlags capture Kubernetes-related options diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 1047071fe6..ac8b3b039c 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -35,6 +35,11 @@ import ( mnpinformerfactory "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/informers/externalversions" mnplister "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/listers/k8s.cni.cncf.io/v1beta1" + egressserviceapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + egressservicescheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme" + egressserviceinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions" + egressserviceinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1" + kapi "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" knet "k8s.io/api/networking/v1" @@ -60,13 +65,14 @@ type WatchFactory struct { // requirements with atomic accesses handlerCounter uint64 - iFactory informerfactory.SharedInformerFactory - eipFactory egressipinformerfactory.SharedInformerFactory - efFactory egressfirewallinformerfactory.SharedInformerFactory - cpipcFactory ocpcloudnetworkinformerfactory.SharedInformerFactory - egressQoSFactory egressqosinformerfactory.SharedInformerFactory - mnpFactory mnpinformerfactory.SharedInformerFactory - informers map[reflect.Type]*informer + iFactory informerfactory.SharedInformerFactory + eipFactory egressipinformerfactory.SharedInformerFactory + efFactory egressfirewallinformerfactory.SharedInformerFactory + cpipcFactory ocpcloudnetworkinformerfactory.SharedInformerFactory + egressQoSFactory egressqosinformerfactory.SharedInformerFactory + mnpFactory mnpinformerfactory.SharedInformerFactory + egressServiceFactory egressserviceinformerfactory.SharedInformerFactory + informers map[reflect.Type]*informer stopChan chan struct{} } @@ -131,6 +137,7 @@ var ( EgressFwNodeType reflect.Type = reflect.TypeOf(&egressFwNode{}) CloudPrivateIPConfigType reflect.Type = reflect.TypeOf(&ocpcloudnetworkapi.CloudPrivateIPConfig{}) EgressQoSType reflect.Type = reflect.TypeOf(&egressqosapi.EgressQoS{}) + EgressServiceType reflect.Type = reflect.TypeOf(&egressserviceapi.EgressService{}) AddressSetNamespaceAndPodSelectorType reflect.Type = reflect.TypeOf(&addressSetNamespaceAndPodSelector{}) PeerNamespaceSelectorType reflect.Type = reflect.TypeOf(&peerNamespaceSelector{}) AddressSetPodSelectorType reflect.Type = reflect.TypeOf(&addressSetPodSelector{}) @@ -156,14 +163,15 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory // the downside of making it tight (like 10 minutes) is needless spinning on all resources // However, AddEventHandlerWithResyncPeriod can specify a per handler resync period wf := &WatchFactory{ - iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), - eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), - efFactory: egressfirewallinformerfactory.NewSharedInformerFactory(ovnClientset.EgressFirewallClient, resyncInterval), - cpipcFactory: ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval), - egressQoSFactory: egressqosinformerfactory.NewSharedInformerFactory(ovnClientset.EgressQoSClient, resyncInterval), - mnpFactory: mnpinformerfactory.NewSharedInformerFactory(ovnClientset.MultiNetworkPolicyClient, resyncInterval), - informers: make(map[reflect.Type]*informer), - stopChan: make(chan struct{}), + iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), + eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), + efFactory: egressfirewallinformerfactory.NewSharedInformerFactory(ovnClientset.EgressFirewallClient, resyncInterval), + cpipcFactory: ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval), + egressQoSFactory: egressqosinformerfactory.NewSharedInformerFactory(ovnClientset.EgressQoSClient, resyncInterval), + mnpFactory: mnpinformerfactory.NewSharedInformerFactory(ovnClientset.MultiNetworkPolicyClient, resyncInterval), + egressServiceFactory: egressserviceinformerfactory.NewSharedInformerFactory(ovnClientset.EgressServiceClient, resyncInterval), + informers: make(map[reflect.Type]*informer), + stopChan: make(chan struct{}), } if err := egressipapi.AddToScheme(egressipscheme.Scheme); err != nil { @@ -175,6 +183,9 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory if err := egressqosapi.AddToScheme(egressqosscheme.Scheme); err != nil { return nil, err } + if err := egressserviceapi.AddToScheme(egressservicescheme.Scheme); err != nil { + return nil, err + } if err := nadapi.AddToScheme(nadscheme.Scheme); err != nil { return nil, err @@ -257,6 +268,12 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory return nil, err } } + if config.OVNKubernetesFeature.EnableEgressService { + wf.informers[EgressServiceType], err = newInformer(EgressServiceType, wf.egressServiceFactory.K8s().V1().EgressServices().Informer()) + if err != nil { + return nil, err + } + } if util.IsMultiNetworkPoliciesSupportEnabled() { wf.informers[MultiNetworkPolicyType], err = newInformer(MultiNetworkPolicyType, wf.mnpFactory.K8sCniCncfIo().V1beta1().MultiNetworkPolicies().Informer()) @@ -318,6 +335,15 @@ func (wf *WatchFactory) Start() error { } } + if config.OVNKubernetesFeature.EnableEgressService && wf.egressServiceFactory != nil { + wf.egressServiceFactory.Start(wf.stopChan) + for oType, synced := range wf.egressServiceFactory.WaitForCacheSync(wf.stopChan) { + if !synced { + return fmt.Errorf("error in syncing cache for %v informer", oType) + } + } + } + return nil } @@ -325,9 +351,14 @@ func (wf *WatchFactory) Start() error { // informers to save memory + bandwidth. It is to be used by the node-only process. func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) (*WatchFactory, error) { wf := &WatchFactory{ - iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), - informers: make(map[reflect.Type]*informer), - stopChan: make(chan struct{}), + iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), + egressServiceFactory: egressserviceinformerfactory.NewSharedInformerFactory(ovnClientset.EgressServiceClient, resyncInterval), + informers: make(map[reflect.Type]*informer), + stopChan: make(chan struct{}), + } + + if err := egressserviceapi.AddToScheme(egressservicescheme.Scheme); err != nil { + return nil, err } // For Services and Endpoints, pre-populate the shared Informer with one that @@ -398,6 +429,13 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( return nil, err } + if config.OVNKubernetesFeature.EnableEgressService { + wf.informers[EgressServiceType], err = newInformer(EgressServiceType, wf.egressServiceFactory.K8s().V1().EgressServices().Informer()) + if err != nil { + return nil, err + } + } + return wf, nil } @@ -710,6 +748,10 @@ func (wf *WatchFactory) RemoveEgressQoSHandler(handler *Handler) { wf.removeHandler(EgressQoSType, handler) } +func (wf *WatchFactory) RemoveEgressServiceHandler(handler *Handler) { + wf.removeHandler(EgressServiceType, handler) +} + // AddNetworkAttachmentDefinitionHandler adds a handler function that will be executed on NetworkAttachmentDefinition object changes func (wf *WatchFactory) AddNetworkAttachmentDefinitionHandler(handlerFuncs cache.ResourceEventHandler, processExisting func([]interface{}) error) (*Handler, error) { return wf.addHandler(NetworkAttachmentDefinitionType, "", nil, handlerFuncs, processExisting, defaultHandlerPriority) @@ -939,10 +981,18 @@ func (wf *WatchFactory) ServiceInformer() cache.SharedIndexInformer { return wf.informers[ServiceType].inf } +func (wf *WatchFactory) EndpointSliceInformer() cache.SharedIndexInformer { + return wf.informers[EndpointSliceType].inf +} + func (wf *WatchFactory) EgressQoSInformer() egressqosinformer.EgressQoSInformer { return wf.egressQoSFactory.K8s().V1().EgressQoSes() } +func (wf *WatchFactory) EgressServiceInformer() egressserviceinformer.EgressServiceInformer { + return wf.egressServiceFactory.K8s().V1().EgressServices() +} + // withServiceNameAndNoHeadlessServiceSelector returns a LabelSelector (added to the // watcher for EndpointSlices) that will only choose EndpointSlices with a non-empty // "kubernetes.io/service-name" label and without "service.kubernetes.io/headless" diff --git a/go-controller/pkg/factory/factory_test.go b/go-controller/pkg/factory/factory_test.go index 10a054017e..43c91bb58a 100644 --- a/go-controller/pkg/factory/factory_test.go +++ b/go-controller/pkg/factory/factory_test.go @@ -31,6 +31,9 @@ import ( egressqos "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1" egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" + egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" + ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" ocpconfigapi "github.com/openshift/api/config/v1" ocpcloudnetworkclientsetfake "github.com/openshift/client-go/cloudnetwork/clientset/versioned/fake" @@ -174,6 +177,19 @@ func newEgressQoS(name, namespace string) *egressqos.EgressQoS { } } +func newEgressService(name, namespace string) *egressservice.EgressService { + return &egressservice.EgressService{ + ObjectMeta: newObjectMeta(name, namespace), + Spec: egressservice.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "kubernetes.io/hostname": "node", + }, + }, + }, + } +} + func objSetup(c *fake.Clientset, objType string, listFn func(core.Action) (bool, runtime.Object, error)) *watch.FakeWatcher { w := watch.NewFake() c.AddWatchReactor(objType, core.DefaultWatchReactor(w, nil)) @@ -209,6 +225,13 @@ func egressQoSObjSetup(c *egressqosfake.Clientset, objType string, listFn func(c return w } +func egressServiceObjSetup(c *egressservicefake.Clientset, objType string, listFn func(core.Action) (bool, runtime.Object, error)) *watch.FakeWatcher { + w := watch.NewFake() + c.AddWatchReactor(objType, core.DefaultWatchReactor(w, nil)) + c.AddReactor("list", objType, listFn) + return w +} + type handlerCalls struct { added int32 updated int32 @@ -235,6 +258,7 @@ var _ = Describe("Watch Factory Operations", func() { egressFirewallFakeClient *egressfirewallfake.Clientset cloudNetworkFakeClient *ocpcloudnetworkclientsetfake.Clientset egressQoSFakeClient *egressqosfake.Clientset + egressServiceFakeClient *egressservicefake.Clientset podWatch, namespaceWatch, nodeWatch *watch.FakeWatcher policyWatch, serviceWatch *watch.FakeWatcher endpointSliceWatch *watch.FakeWatcher @@ -242,6 +266,7 @@ var _ = Describe("Watch Factory Operations", func() { egressIPWatch *watch.FakeWatcher cloudPrivateIPConfigWatch *watch.FakeWatcher egressQoSWatch *watch.FakeWatcher + egressServiceWatch *watch.FakeWatcher pods []*v1.Pod namespaces []*v1.Namespace nodes []*v1.Node @@ -253,6 +278,7 @@ var _ = Describe("Watch Factory Operations", func() { wf *WatchFactory egressFirewalls []*egressfirewall.EgressFirewall egressQoSes []*egressqos.EgressQoS + egressServices []*egressservice.EgressService err error ) @@ -267,6 +293,7 @@ var _ = Describe("Watch Factory Operations", func() { config.OVNKubernetesFeature.EnableEgressIP = true config.OVNKubernetesFeature.EnableEgressFirewall = true config.OVNKubernetesFeature.EnableEgressQoS = true + config.OVNKubernetesFeature.EnableEgressService = true config.Kubernetes.PlatformType = string(ocpconfigapi.AWSPlatformType) fakeClient = &fake.Clientset{} @@ -274,6 +301,7 @@ var _ = Describe("Watch Factory Operations", func() { egressIPFakeClient = &egressipfake.Clientset{} cloudNetworkFakeClient = &ocpcloudnetworkclientsetfake.Clientset{} egressQoSFakeClient = &egressqosfake.Clientset{} + egressServiceFakeClient = &egressservicefake.Clientset{} ovnClientset = &util.OVNMasterClientset{ KubeClient: fakeClient, @@ -281,6 +309,7 @@ var _ = Describe("Watch Factory Operations", func() { EgressFirewallClient: egressFirewallFakeClient, CloudNetworkClient: cloudNetworkFakeClient, EgressQoSClient: egressQoSFakeClient, + EgressServiceClient: egressServiceFakeClient, } pods = make([]*v1.Pod, 0) @@ -372,6 +401,15 @@ var _ = Describe("Watch Factory Operations", func() { } return true, obj, nil }) + + egressServices = make([]*egressservice.EgressService, 0) + egressServiceWatch = egressServiceObjSetup(egressServiceFakeClient, "egressservices", func(core.Action) (bool, runtime.Object, error) { + obj := &egressservice.EgressServiceList{} + for _, p := range egressServices { + obj.Items = append(obj.Items, *p) + } + return true, obj, nil + }) }) AfterEach(func() { @@ -509,6 +547,10 @@ var _ = Describe("Watch Factory Operations", func() { egressQoSes = append(egressQoSes, newEgressQoS("myEgressQoS", "default")) testExisting(EgressQoSType, "", nil, defaultHandlerPriority) }) + It("is called for each existing egressService", func() { + egressServices = append(egressServices, newEgressService("myEgressService", "default")) + testExisting(EgressServiceType, "", nil, defaultHandlerPriority) + }) It("is called for each existing pod that matches a given namespace and label", func() { pod := newPod("pod1", "default") @@ -606,6 +648,11 @@ var _ = Describe("Watch Factory Operations", func() { egressQoSes = append(egressQoSes, newEgressQoS("myEgressQoS1", "default")) testExisting(EgressQoSType) }) + It("calls ADD for each existing egressService", func() { + egressServices = append(egressServices, newEgressService("myEgressService", "default")) + egressServices = append(egressServices, newEgressService("myEgressService1", "default")) + testExisting(EgressServiceType) + }) }) Context("when EgressIP is disabled", func() { @@ -647,6 +694,19 @@ var _ = Describe("Watch Factory Operations", func() { testExisting(EgressQoSType) }) }) + Context("when EgressService is disabled", func() { + testExisting := func(objType reflect.Type) { + wf, err = NewMasterWatchFactory(ovnClientset) + Expect(err).NotTo(HaveOccurred()) + err = wf.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(wf.informers).NotTo(HaveKey(objType)) + } + It("does not contain EgressService informer", func() { + config.OVNKubernetesFeature.EnableEgressService = false + testExisting(EgressServiceType) + }) + }) addFilteredHandler := func(wf *WatchFactory, objType reflect.Type, realObjType reflect.Type, namespace string, sel labels.Selector, funcs cache.ResourceEventHandlerFuncs) (*Handler, *handlerCalls) { calls := handlerCalls{} @@ -1657,6 +1717,49 @@ var _ = Describe("Watch Factory Operations", func() { wf.RemoveEgressQoSHandler(h) }) + It("responds to egressService add/update/delete events", func() { + wf, err = NewMasterWatchFactory(ovnClientset) + Expect(err).NotTo(HaveOccurred()) + err = wf.Start() + Expect(err).NotTo(HaveOccurred()) + + added := newEgressService("myEgressService", "default") + h, c := addHandler(wf, EgressServiceType, cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + egressService := obj.(*egressservice.EgressService) + Expect(reflect.DeepEqual(egressService, added)).To(BeTrue()) + }, + UpdateFunc: func(old, new interface{}) { + newEgressService := new.(*egressservice.EgressService) + Expect(reflect.DeepEqual(newEgressService, added)).To(BeTrue()) + Expect(newEgressService.Spec.NodeSelector).To(Equal(metav1.LabelSelector{ + MatchLabels: map[string]string{ + "kubernetes.io/hostname": "node2", + }, + })) + }, + DeleteFunc: func(obj interface{}) { + egressService := obj.(*egressservice.EgressService) + Expect(reflect.DeepEqual(egressService, added)).To(BeTrue()) + }, + }) + + egressServices = append(egressServices, added) + egressServiceWatch.Add(added) + Eventually(c.getAdded, 2).Should(Equal(1)) + added.Spec.NodeSelector = metav1.LabelSelector{ + MatchLabels: map[string]string{ + "kubernetes.io/hostname": "node2", + }, + } + egressServiceWatch.Modify(added) + Eventually(c.getUpdated, 2).Should(Equal(1)) + egressServices = egressServices[:0] + egressServiceWatch.Delete(added) + Eventually(c.getDeleted, 2).Should(Equal(1)) + + wf.RemoveEgressServiceHandler(h) + }) It("stops processing events after the handler is removed", func() { wf, err = NewMasterWatchFactory(ovnClientset) Expect(err).NotTo(HaveOccurred()) diff --git a/go-controller/pkg/factory/handler.go b/go-controller/pkg/factory/handler.go index 22f787ce69..45b1b6e087 100644 --- a/go-controller/pkg/factory/handler.go +++ b/go-controller/pkg/factory/handler.go @@ -15,6 +15,7 @@ import ( egressfirewalllister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/listers/egressfirewall/v1" egressqoslister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/listers/egressqos/v1" + egressservicelister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1" cloudprivateipconfiglister "github.com/openshift/client-go/cloudnetwork/listers/cloudnetwork/v1" egressiplister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/listers/egressip/v1" @@ -513,6 +514,8 @@ func newInformerLister(oType reflect.Type, sharedInformer cache.SharedIndexInfor return networkattachmentdefinitionlister.NewNetworkAttachmentDefinitionLister(sharedInformer.GetIndexer()), nil case MultiNetworkPolicyType: return multinetworkpolicylister.NewMultiNetworkPolicyLister(sharedInformer.GetIndexer()), nil + case EgressServiceType: + return egressservicelister.NewEgressServiceLister(sharedInformer.GetIndexer()), nil } return nil, fmt.Errorf("cannot create lister from type %v", oType) diff --git a/go-controller/pkg/kube/kube.go b/go-controller/pkg/kube/kube.go index 7e7d8dbef2..c790df4b9b 100644 --- a/go-controller/pkg/kube/kube.go +++ b/go-controller/pkg/kube/kube.go @@ -10,6 +10,7 @@ import ( egressfirewallclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" egressipclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned" + egressserviceclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned" kapi "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -33,6 +34,7 @@ type InterfaceOVN interface { CreateCloudPrivateIPConfig(cloudPrivateIPConfig *ocpcloudnetworkapi.CloudPrivateIPConfig) (*ocpcloudnetworkapi.CloudPrivateIPConfig, error) UpdateCloudPrivateIPConfig(cloudPrivateIPConfig *ocpcloudnetworkapi.CloudPrivateIPConfig) (*ocpcloudnetworkapi.CloudPrivateIPConfig, error) DeleteCloudPrivateIPConfig(name string) error + UpdateEgressServiceStatus(namespace, name, host string) error } // Interface represents the exported methods for dealing with getting/setting @@ -70,6 +72,7 @@ type KubeOVN struct { EIPClient egressipclientset.Interface EgressFirewallClient egressfirewallclientset.Interface CloudNetworkClient ocpcloudnetworkclientset.Interface + EgressServiceClient egressserviceclientset.Interface } // SetAnnotationsOnPod takes the pod object and map of key/value string pairs to set as annotations @@ -387,3 +390,15 @@ func (k *KubeOVN) UpdateCloudPrivateIPConfig(cloudPrivateIPConfig *ocpcloudnetwo func (k *KubeOVN) DeleteCloudPrivateIPConfig(name string) error { return k.CloudNetworkClient.CloudV1().CloudPrivateIPConfigs().Delete(context.TODO(), name, metav1.DeleteOptions{}) } + +func (k *KubeOVN) UpdateEgressServiceStatus(namespace, name, host string) error { + es, err := k.EgressServiceClient.K8sV1().EgressServices(namespace).Get(context.TODO(), name, metav1.GetOptions{}) + if err != nil { + return err + } + + es.Status.Host = host + + _, err = k.EgressServiceClient.K8sV1().EgressServices(es.Namespace).UpdateStatus(context.TODO(), es, metav1.UpdateOptions{}) + return err +} diff --git a/go-controller/pkg/kube/mocks/Interface.go b/go-controller/pkg/kube/mocks/Interface.go index 0c23b2bf63..f54b087e2b 100644 --- a/go-controller/pkg/kube/mocks/Interface.go +++ b/go-controller/pkg/kube/mocks/Interface.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.15.0. DO NOT EDIT. package mocks @@ -283,6 +283,20 @@ func (_m *Interface) UpdateNode(node *corev1.Node) error { return r0 } +// UpdateEgressServiceStatus provides a mock function with given fields: namespace, name, host +func (_m *Interface) UpdateEgressServiceStatus(namespace string, name string, host string) error { + ret := _m.Called(namespace, name, host) + + var r0 error + if rf, ok := ret.Get(0).(func(string, string, string) error); ok { + r0 = rf(namespace, name, host) + } else { + r0 = ret.Error(0) + } + + return r0 +} + // UpdateNodeStatus provides a mock function with given fields: node func (_m *Interface) UpdateNodeStatus(node *corev1.Node) error { ret := _m.Called(node) diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index 15388585be..b87f348bf4 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -193,6 +193,7 @@ func NewNetworkControllerManager(ovnClient *util.OVNClientset, identity string, EIPClient: ovnClient.EgressIPClient, EgressFirewallClient: ovnClient.EgressFirewallClient, CloudNetworkClient: ovnClient.CloudNetworkClient, + EgressServiceClient: ovnClient.EgressServiceClient, }, stopChan: make(chan struct{}), watchFactory: wf, diff --git a/go-controller/pkg/ovn/controller/egress_services/egress_services_controller.go b/go-controller/pkg/ovn/controller/egress_services/egress_services_controller.go index a2672aafe1..e531ec4ab9 100644 --- a/go-controller/pkg/ovn/controller/egress_services/egress_services_controller.go +++ b/go-controller/pkg/ovn/controller/egress_services/egress_services_controller.go @@ -8,6 +8,10 @@ import ( "time" libovsdbclient "github.com/ovn-org/libovsdb/client" + libovsdb "github.com/ovn-org/libovsdb/ovsdb" + egressserviceapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + egressserviceinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1" + egressservicelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" @@ -17,8 +21,10 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/errors" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" @@ -34,8 +40,9 @@ import ( ) const ( - maxRetries = 10 - svcExternalIDKey = "EgressSVC" // key set on lrps to identify to which egress service it belongs + maxRetries = 10 + svcExternalIDKey = "EgressSVC" // key set on lrps to identify to which egress service it belongs + egressSVCLabelPrefix = "egress-service.k8s.ovn.org" ) type InitClusterEgressPoliciesFunc func(client libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory, @@ -58,6 +65,7 @@ type Controller struct { deleteNoRerouteNodePolicies DeleteNoRerouteNodePoliciesFunc deleteLegacyDefaultNoRerouteNodePolicies DeleteLegacyDefaultNoRerouteNodePoliciesFunc IsReachable func(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool // TODO: make a universal cache instead + setEgressServiceStatus func(ns, name, host string) error services map[string]*svcState // svc key -> state nodes map[string]*nodeState // node name -> state @@ -65,11 +73,16 @@ type Controller struct { // A map of the services we attempted to allocate but could not. // When a node is updated we check this map to see if a service can // be allocated on it - if it does we queue the service again. - unallocatedServices map[string]labels.Selector + // We also check this cache when an ep is added, as the service might + // got to this cache by having no eps. + unallocatedServices map[string]labels.Selector // svc key -> its node selector + + egressServiceLister egressservicelisters.EgressServiceLister + egressServiceSynced cache.InformerSynced + egressServiceQueue workqueue.RateLimitingInterface serviceLister corelisters.ServiceLister servicesSynced cache.InformerSynced - servicesQueue workqueue.RateLimitingInterface endpointSliceLister discoverylisters.EndpointSliceLister endpointSlicesSynced cache.InformerSynced @@ -113,12 +126,15 @@ func NewController( createNoRerouteNodePolicies CreateNoRerouteNodePoliciesFunc, deleteNoRerouteNodePolicies DeleteNoRerouteNodePoliciesFunc, deleteLegacyDefaultNoRerouteNodePolicies DeleteLegacyDefaultNoRerouteNodePoliciesFunc, + setEgressServiceStatus func(ns, name, host string) error, isReachable func(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool, stopCh <-chan struct{}, + esInformer egressserviceinformer.EgressServiceInformer, serviceInformer coreinformers.ServiceInformer, endpointSliceInformer discoveryinformers.EndpointSliceInformer, nodeInformer coreinformers.NodeInformer) (*Controller, error) { klog.Info("Setting up event handlers for Egress Services") + c := &Controller{ controllerName: controllerName, client: client, @@ -129,19 +145,31 @@ func NewController( deleteNoRerouteNodePolicies: deleteNoRerouteNodePolicies, deleteLegacyDefaultNoRerouteNodePolicies: deleteLegacyDefaultNoRerouteNodePolicies, IsReachable: isReachable, + setEgressServiceStatus: setEgressServiceStatus, stopCh: stopCh, services: map[string]*svcState{}, nodes: map[string]*nodeState{}, unallocatedServices: map[string]labels.Selector{}, } - c.serviceLister = serviceInformer.Lister() - c.servicesSynced = serviceInformer.Informer().HasSynced - c.servicesQueue = workqueue.NewNamedRateLimitingQueue( + c.egressServiceLister = esInformer.Lister() + c.egressServiceSynced = esInformer.Informer().HasSynced + c.egressServiceQueue = workqueue.NewNamedRateLimitingQueue( workqueue.NewItemFastSlowRateLimiter(1*time.Second, 5*time.Second, 5), "egressservices", ) - _, err := serviceInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + _, err := esInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onEgressServiceAdd, + UpdateFunc: c.onEgressServiceUpdate, + DeleteFunc: c.onEgressServiceDelete, + })) + if err != nil { + return nil, err + } + + c.serviceLister = serviceInformer.Lister() + c.servicesSynced = serviceInformer.Informer().HasSynced + _, err = serviceInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ AddFunc: c.onServiceAdd, UpdateFunc: c.onServiceUpdate, DeleteFunc: c.onServiceDelete, @@ -184,19 +212,25 @@ func (c *Controller) Run(threadiness int) { klog.Infof("Starting Egress Services Controller") - if !cache.WaitForNamedCacheSync("egressservices", c.stopCh, c.servicesSynced) { + if !cache.WaitForNamedCacheSync("egressservices", c.stopCh, c.egressServiceSynced) { utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) klog.Infof("Synchronization failed") return } - if !cache.WaitForNamedCacheSync("egressserviceendpointslices", c.stopCh, c.endpointSlicesSynced) { + if !cache.WaitForNamedCacheSync("egressservices_services", c.stopCh, c.servicesSynced) { utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) klog.Infof("Synchronization failed") return } - if !cache.WaitForNamedCacheSync("egressservicenodes", c.stopCh, c.nodesSynced) { + if !cache.WaitForNamedCacheSync("egressservices_endpointslices", c.stopCh, c.endpointSlicesSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("egressservices_nodes", c.stopCh, c.nodesSynced) { utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) klog.Infof("Synchronization failed") return @@ -219,7 +253,7 @@ func (c *Controller) Run(threadiness int) { go func() { defer wg.Done() wait.Until(func() { - c.runServiceWorker(wg) + c.runEgressServiceWorker(wg) }, time.Second, c.stopCh) }() } @@ -240,7 +274,7 @@ func (c *Controller) Run(threadiness int) { <-c.stopCh klog.Infof("Shutting down Egress Services controller") - c.servicesQueue.ShutDown() + c.egressServiceQueue.ShutDown() c.nodesQueue.ShutDown() wg.Wait() @@ -263,84 +297,107 @@ func (c *Controller) repair() error { svcKeyToConfiguredV4Endpoints := map[string][]string{} svcKeyToConfiguredV6Endpoints := map[string][]string{} - services, _ := c.serviceLister.List(labels.Everything()) + services, err := c.serviceLister.List(labels.Everything()) + if err != nil { + return err + } + allServices := map[string]*corev1.Service{} + for _, s := range services { + key, err := cache.MetaNamespaceKeyFunc(s) + if err != nil { + klog.Errorf("Failed to read Service key: %v", err) + continue + } + allServices[key] = s + } - for _, svc := range services { - if util.HasEgressSVCAnnotation(svc) && util.HasEgressSVCHostAnnotation(svc) && - util.ServiceTypeHasLoadBalancer(svc) && len(svc.Status.LoadBalancer.Ingress) > 0 { - var err error - key, _ := cache.MetaNamespaceKeyFunc(svc) - conf, err := util.ParseEgressSVCAnnotation(svc.Annotations) - if err != nil && !util.IsAnnotationNotSetError(err) { - klog.Errorf("can't parse %s egress service configuration, err: %v", key, err) - continue - } + egressServices, err := c.egressServiceLister.List(labels.Everything()) + if err != nil { + return err + } + for _, es := range egressServices { + key, err := cache.MetaNamespaceKeyFunc(es) + if err != nil { + klog.Errorf("Failed to read EgressService key: %v", err) + continue + } + svc := allServices[key] + if svc == nil { + continue + } - nodeSelector := &conf.NodeSelector - svcHost, _ := util.GetEgressSVCHost(svc) + if !util.ServiceTypeHasLoadBalancer(svc) || len(svc.Status.LoadBalancer.Ingress) == 0 { + continue + } - node, err := c.nodeLister.Get(svcHost) - if err != nil { - klog.Errorf("Node %s could not be retrieved from lister, err: %v", svcHost, err) - continue - } - if !nodeIsReady(node) { - klog.Errorf("Node %s is not ready, it can not be used for egress service %s", svcHost, key) - continue - } + nodeSelector := &es.Spec.NodeSelector + svcHost := es.Status.Host - v4, v6, epsNodes, err := c.allEndpointsFor(svc) - if err != nil { - klog.Errorf("Can't fetch all endpoints for egress service %s, err: %v", key, err) - continue - } + if svcHost == "" { + continue + } - totalEps := len(v4) + len(v6) - if totalEps == 0 { - klog.Errorf("Egress service %s has no endpoints", key) - continue - } + node, err := c.nodeLister.Get(svcHost) + if err != nil { + klog.Errorf("Node %s could not be retrieved from lister, err: %v", svcHost, err) + continue + } + if !nodeIsReady(node) { + klog.Infof("Node %s is not ready, it can not be used for egress service %s", svcHost, key) + continue + } - if len(epsNodes) != 0 && svc.Spec.ExternalTrafficPolicy == corev1.ServiceExternalTrafficPolicyTypeLocal { - // If the service is ETP=Local only a node with local eps can be used. - // We want to verify that the current selected node has a local ep. - matchEpsNodes := metav1.LabelSelectorRequirement{ - Key: "kubernetes.io/hostname", - Operator: metav1.LabelSelectorOpIn, - Values: epsNodes, - } - nodeSelector.MatchExpressions = append(nodeSelector.MatchExpressions, matchEpsNodes) - } + v4, v6, epsNodes, err := c.allEndpointsFor(svc) + if err != nil { + klog.Errorf("Can't fetch all endpoints for egress service %s, err: %v", key, err) + continue + } - selector, err := metav1.LabelSelectorAsSelector(nodeSelector) - if err != nil { - klog.Errorf("Selector is invalid, err: %v", err) - continue - } + totalEps := len(v4) + len(v6) + if totalEps == 0 { + klog.Infof("Egress service %s has no endpoints", key) + continue + } - if !selector.Matches(labels.Set(node.Labels)) { - klog.Errorf("Node %s does no longer match service %s selectors %s", svcHost, key, selector.String()) - continue + if len(epsNodes) != 0 && svc.Spec.ExternalTrafficPolicy == corev1.ServiceExternalTrafficPolicyTypeLocal { + // If the service is ETP=Local only a node with local eps can be used. + // We want to verify that the current selected node has a local ep. + matchEpsNodes := metav1.LabelSelectorRequirement{ + Key: "kubernetes.io/hostname", + Operator: metav1.LabelSelectorOpIn, + Values: epsNodes, } + nodeSelector.MatchExpressions = append(nodeSelector.MatchExpressions, matchEpsNodes) + } - nodeState, ok := c.nodes[svcHost] - if !ok { - nodeState, err = c.nodeStateFor(svcHost) - if err != nil { - klog.Errorf("Can't fetch egress service %s node %s state, err: %v", key, svcHost, err) - continue - } - } + selector, err := metav1.LabelSelectorAsSelector(nodeSelector) + if err != nil { + klog.Errorf("Selector %s is invalid for EgressService %s, err: %v", selector.String(), key, err) + continue + } + + if !selector.Matches(labels.Set(node.Labels)) { + klog.Infof("Node %s does no longer match service %s selectors %s", svcHost, key, selector.String()) + continue + } - svcKeyToAllV4Endpoints[key] = v4 - svcKeyToAllV6Endpoints[key] = v6 - svcKeyToConfiguredV4Endpoints[key] = []string{} - svcKeyToConfiguredV6Endpoints[key] = []string{} - svcState := &svcState{node: svcHost, selector: selector, v4Endpoints: sets.New[string](), v6Endpoints: sets.New[string](), stale: false} - nodeState.allocations[key] = svcState - c.nodes[svcHost] = nodeState - c.services[key] = svcState + nodeState, ok := c.nodes[svcHost] + if !ok { + nodeState, err = c.nodeStateFor(svcHost) + if err != nil { + klog.Errorf("Can't fetch egress service %s node %s state, err: %v", key, svcHost, err) + continue + } } + + svcKeyToAllV4Endpoints[key] = v4 + svcKeyToAllV6Endpoints[key] = v6 + svcKeyToConfiguredV4Endpoints[key] = []string{} + svcKeyToConfiguredV6Endpoints[key] = []string{} + svcState := &svcState{node: svcHost, selector: selector, v4Endpoints: sets.New[string](), v6Endpoints: sets.New[string](), stale: false} + nodeState.allocations[key] = svcState + c.nodes[svcHost] = nodeState + c.services[key] = svcState } p := func(item *nbdb.LogicalRouterPolicy) bool { @@ -391,9 +448,11 @@ func (c *Controller) repair() error { return false } - err := libovsdbops.DeleteLogicalRouterPoliciesWithPredicate(c.nbClient, ovntypes.OVNClusterRouter, p) + errorList := []error{} + err = libovsdbops.DeleteLogicalRouterPoliciesWithPredicate(c.nbClient, ovntypes.OVNClusterRouter, p) if err != nil { - return fmt.Errorf("error deleting stale logical router policies from router %s: %v", ovntypes.OVNClusterRouter, err) + errorList = append(errorList, + fmt.Errorf("error deleting stale logical router policies from router %s: %v", ovntypes.OVNClusterRouter, err)) } // update caches after transaction completed @@ -405,8 +464,24 @@ func (c *Controller) repair() error { c.services[key].v6Endpoints.Insert(v6ToAdd...) } - // now remove any stale egress service labels on nodes + // remove stale host entries from EgressServices without a valid service + for _, es := range egressServices { + key, err := cache.MetaNamespaceKeyFunc(es) + if err != nil { + klog.Errorf("Failed to read EgressService key: %v", err) + continue + } + _, found := c.services[key] + if !found { + err := c.setEgressServiceHost(es.Namespace, es.Name, "") + if err != nil { + errorList = append(errorList, + fmt.Errorf("failed to remove stale host entry from EgressService %s, err: %v", key, err)) + } + } + } + // now remove any stale egress service labels on nodes nodes, _ := c.nodeLister.List(labels.Everything()) svcLabelToNode := map[string]string{} for key, state := range c.services { @@ -417,16 +492,342 @@ func (c *Controller) repair() error { for _, node := range nodes { labelsToRemove := map[string]any{} for labelKey := range node.Labels { - if strings.HasPrefix(labelKey, util.EgressSVCLabelPrefix) && svcLabelToNode[labelKey] != node.Name { + if strings.HasPrefix(labelKey, egressSVCLabelPrefix) && svcLabelToNode[labelKey] != node.Name { labelsToRemove[labelKey] = nil // Patching with a nil value results in the delete of the key } } err := c.patchNodeLabels(node.Name, labelsToRemove) if err != nil { - klog.Errorf("Failed to remove stale labels %v from node %s, err: %v", labelsToRemove, node.Name, err) - continue + errorList = append(errorList, + fmt.Errorf("failed to remove stale labels %v from node %s, err: %v", labelsToRemove, node.Name, err)) + } + } + + return errors.NewAggregate(errorList) +} + +// onEgressServiceAdd queues the EgressService for processing. +func (c *Controller) onEgressServiceAdd(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + c.egressServiceQueue.Add(key) +} + +// onEgressServiceUpdate queues the EgressService for processing. +func (c *Controller) onEgressServiceUpdate(oldObj, newObj interface{}) { + oldEQ := oldObj.(*egressserviceapi.EgressService) + newEQ := newObj.(*egressserviceapi.EgressService) + + if oldEQ.ResourceVersion == newEQ.ResourceVersion || + !newEQ.GetDeletionTimestamp().IsZero() { + return + } + + key, err := cache.MetaNamespaceKeyFunc(newObj) + if err == nil { + c.egressServiceQueue.Add(key) + } +} + +// onEgressServiceDelete queues the EgressService for processing. +func (c *Controller) onEgressServiceDelete(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + c.egressServiceQueue.Add(key) +} + +func (c *Controller) runEgressServiceWorker(wg *sync.WaitGroup) { + for c.processNextEgressServiceWorkItem(wg) { + } +} + +func (c *Controller) processNextEgressServiceWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + key, quit := c.egressServiceQueue.Get() + if quit { + return false + } + + defer c.egressServiceQueue.Done(key) + + err := c.syncEgressService(key.(string)) + if err == nil { + c.egressServiceQueue.Forget(key) + return true + } + + utilruntime.HandleError(fmt.Errorf("%v failed with : %v", key, err)) + + if c.egressServiceQueue.NumRequeues(key) < maxRetries { + c.egressServiceQueue.AddRateLimited(key) + return true + } + + c.egressServiceQueue.Forget(key) + return true +} + +func (c *Controller) syncEgressService(key string) error { + c.Lock() + defer c.Unlock() + + startTime := time.Now() + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + klog.Infof("Processing sync for Egress Service %s/%s", namespace, name) + + defer func() { + klog.V(4).Infof("Finished syncing Egress Service %s/%s : %v", namespace, name, time.Since(startTime)) + }() + + es, err := c.egressServiceLister.EgressServices(namespace).Get(name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + + svc, err := c.serviceLister.Services(namespace).Get(name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + + state := c.services[key] + if es == nil { + if state == nil { + // The EgressService was deleted and wasn't configured before. + // We delete it from the unallocated service cache just in case. + delete(c.unallocatedServices, key) + return nil + } + // The service is configured but does no longer have the egress service resource, + // meaning we should clear all of its resources. + return c.clearServiceResourcesAndRequeue(key, state) + } + + if svc == nil { + if state == nil { + // The service object was deleted and was not an allocated egress service. + // We delete it from the unallocated service cache just in case. + delete(c.unallocatedServices, key) + return c.setEgressServiceHost(namespace, name, "") + } + // The service was deleted and was an egress service. + // We delete all of its relevant resources to avoid leaving stale configuration. + return c.clearServiceResourcesAndRequeue(key, state) + } + + // At this point both the EgressService resource and the Service != nil + + if state != nil && state.stale { + // The service is marked stale because something failed when trying to delete it. + // We try to delete it again before doing anything else. + return c.clearServiceResourcesAndRequeue(key, state) + } + + if state == nil && len(svc.Status.LoadBalancer.Ingress) == 0 { + // The service wasn't configured before and does not have an ingress ip. + // we don't need to configure it and make sure it does not have a stale host value or unallocated entry. + klog.V(4).Infof("EgressService %s/%s does not have an ingress ip, will not attempt configuring it", namespace, name) + delete(c.unallocatedServices, key) + return c.setEgressServiceHost(namespace, name, "") + } + + if state != nil && len(svc.Status.LoadBalancer.Ingress) == 0 { + // The service has no ingress ips so it is not considered valid anymore. + klog.V(4).Infof("EgressService %s/%s does not have an ingress ip anymore, removing its existing configuration", namespace, name) + return c.clearServiceResourcesAndRequeue(key, state) + } + + nodeSelector := es.Spec.NodeSelector + v4Endpoints, v6Endpoints, epsNodes, err := c.allEndpointsFor(svc) + if err != nil { + return err + } + + // If the service is ETP=Local we'd like to add an additional constraint to the selector + // that only a node with local eps can be selected. Otherwise new ingress traffic will break. + if len(epsNodes) != 0 && svc.Spec.ExternalTrafficPolicy == corev1.ServiceExternalTrafficPolicyTypeLocal { + matchEpsNodes := metav1.LabelSelectorRequirement{ + Key: "kubernetes.io/hostname", + Operator: metav1.LabelSelectorOpIn, + Values: epsNodes, + } + nodeSelector.MatchExpressions = append(nodeSelector.MatchExpressions, matchEpsNodes) + } + selector, err := metav1.LabelSelectorAsSelector(&nodeSelector) + if err != nil { + return err + } + totalEps := len(v4Endpoints) + len(v6Endpoints) + + // We don't want to select a node for a service without endpoints to not "waste" an + // allocation on a node. + if totalEps == 0 && state == nil { + klog.V(4).Infof("EgressService %s/%s does not have any endpoints, will not attempt configuring it", namespace, name) + c.unallocatedServices[key] = selector + return c.setEgressServiceHost(namespace, name, "") + } + + if totalEps == 0 && state != nil { + klog.V(4).Infof("EgressService %s/%s does not have any endpoints, removing its existing configuration", namespace, name) + c.unallocatedServices[key] = selector + return c.clearServiceResourcesAndRequeue(key, state) + } + + if state == nil { + // The service has a valid EgressService and wasn't configured before. + // This means we need to select a node for it that matches its selector. + c.unallocatedServices[key] = selector + + node, err := c.selectNodeFor(selector) + if err != nil { + return err + } + + // We found a node - update the caches with the new objects. + delete(c.unallocatedServices, key) + newState := &svcState{node: node.name, selector: selector, v4Endpoints: sets.New[string](), v6Endpoints: sets.New[string](), stale: false} + c.services[key] = newState + node.allocations[key] = newState + c.nodes[node.name] = node + state = newState + } + + state.selector = selector + node := c.nodes[state.node] + + if !state.selector.Matches(labels.Set(node.labels)) { + // The node no longer matches the selector. + // We clear its configured resources and requeue it to attempt + // selecting a new node for it. + return c.clearServiceResourcesAndRequeue(key, state) + } + + // At this point the states are valid and we should create the proper logical router policies. + // We reach the desired state by fetching all of the endpoints associated to the service and comparing + // to the known state: + // We need to create policies for endpoints that were fetched but not found in the cache, + // and delete the policies for those which are found in the cache but were not fetched. + // We do it in one transaction, if it succeeds we update the cache to reflect the new state. + + v4ToAdd := v4Endpoints.Difference(state.v4Endpoints).UnsortedList() + v6ToAdd := v6Endpoints.Difference(state.v6Endpoints).UnsortedList() + v4ToRemove := state.v4Endpoints.Difference(v4Endpoints).UnsortedList() + v6ToRemove := state.v6Endpoints.Difference(v6Endpoints).UnsortedList() + + allOps := []libovsdb.Operation{} + createOps, err := c.createLogicalRouterPoliciesOps(key, node.v4MgmtIP.String(), node.v6MgmtIP.String(), v4ToAdd, v6ToAdd) + if err != nil { + return err + } + allOps = append(allOps, createOps...) + + createOps, err = c.addPodIPsToAddressSetOps(createIPAddressNetSlice(v4ToAdd, v6ToAdd)) + if err != nil { + return err + } + allOps = append(allOps, createOps...) + + deleteOps, err := c.deleteLogicalRouterPoliciesOps(key, v4ToRemove, v6ToRemove) + if err != nil { + return err + } + allOps = append(allOps, deleteOps...) + + deleteOps, err = c.deletePodIPsFromAddressSetOps(createIPAddressNetSlice(v4ToRemove, v6ToRemove)) + if err != nil { + return err + } + allOps = append(allOps, deleteOps...) + + if _, err := libovsdbops.TransactAndCheck(c.nbClient, allOps); err != nil { + return fmt.Errorf("failed to update router policies for %s, err: %v", key, err) + } + + state.v4Endpoints.Insert(v4ToAdd...) + state.v4Endpoints.Delete(v4ToRemove...) + state.v6Endpoints.Insert(v6ToAdd...) + state.v6Endpoints.Delete(v6ToRemove...) + + // We configured OVN - the last step is to label the node and set the status + // to mark it as the node holding the service. + + err = c.setEgressServiceHost(namespace, name, state.node) // set the EgressService status, will also override manual changes + if err != nil { + return err + } + + return c.labelNodeForService(namespace, name, node.name) +} + +// Removes all of the resources that belong to the egress service. +// This includes removing the host status value, the logical router policies, +// the label from the node and updating the caches. +// This also requeues the service after cleaning up to be sure we are not +// missing an event after marking it as stale that should be handled. +// This should only be called with the controller locked. +func (c *Controller) clearServiceResourcesAndRequeue(key string, svcState *svcState) error { + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + + svcState.stale = true + + p := func(item *nbdb.LogicalRouterPolicy) bool { + return item.ExternalIDs[svcExternalIDKey] == key + } + + deleteOps := []libovsdb.Operation{} + deleteOps, err = libovsdbops.DeleteLogicalRouterPolicyWithPredicateOps(c.nbClient, deleteOps, ovntypes.OVNClusterRouter, p) + if err != nil { + return err + } + + if _, err := libovsdbops.TransactAndCheck(c.nbClient, deleteOps); err != nil { + return fmt.Errorf("failed to clean router policies for %s, err: %v", key, err) + } + + if err := c.setEgressServiceHost(namespace, name, ""); err != nil { + return err + } + + nodeState, found := c.nodes[svcState.node] + if found { + if err := c.removeNodeServiceLabel(namespace, name, svcState.node); err != nil { + return fmt.Errorf("failed to remove svc node label for %s, err: %v", svcState.node, err) + } + delete(nodeState.allocations, key) + } + + delete(c.services, key) + c.egressServiceQueue.Add(key) + return nil +} + +func (c *Controller) setEgressServiceHost(namespace, name, host string) error { + err := c.setEgressServiceStatus(namespace, name, host) + if err != nil { + if host != "" { + return err + } + + if !apierrors.IsNotFound(err) { + return err } } + // We return nil if either we got no error or the host="" and we got a "resource missing err". + // This makes stuff easier when cleaning the resources of the service as EgressService deleted + // and EgressService having an empty host means the same in that context. return nil } diff --git a/go-controller/pkg/ovn/controller/egress_services/egress_services_endpointslice.go b/go-controller/pkg/ovn/controller/egress_services/egress_services_endpointslice.go index 4878e516a8..e0401748e2 100644 --- a/go-controller/pkg/ovn/controller/egress_services/egress_services_endpointslice.go +++ b/go-controller/pkg/ovn/controller/egress_services/egress_services_endpointslice.go @@ -67,7 +67,7 @@ func (c *Controller) queueServiceForEndpointSlice(endpointSlice *discovery.Endpo // Once the service label is eventually added, we will get this event // and re-process. if errors.Is(err, services.NoServiceLabelError) { - klog.V(5).Infof(err.Error()) + klog.V(5).Infof("EgressService endpoint slice missing service label: %v", err) } else { utilruntime.HandleError(fmt.Errorf("couldn't get key for EndpointSlice %+v: %v", endpointSlice, err)) } @@ -83,5 +83,5 @@ func (c *Controller) queueServiceForEndpointSlice(endpointSlice *discovery.Endpo return // we queue a service only if it's in the local caches } - c.servicesQueue.Add(key) + c.egressServiceQueue.Add(key) } diff --git a/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go b/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go index 1b0841d9c2..309b08cb1b 100644 --- a/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go +++ b/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go @@ -192,7 +192,7 @@ func (c *Controller) syncNode(key string) error { // Services can't be assigned to a node while it is in draining status. state.draining = true for svcKey, svcState := range state.allocations { - if err := c.clearServiceResources(svcKey, svcState); err != nil { + if err := c.clearServiceResourcesAndRequeue(svcKey, svcState); err != nil { return err } } @@ -223,7 +223,7 @@ func (c *Controller) syncNode(key string) error { // the node's labels can be allocated to it. for svcKey, selector := range c.unallocatedServices { if selector.Matches(labels.Set(nodeLabels)) { - c.servicesQueue.Add(svcKey) + c.egressServiceQueue.Add(svcKey) } } } @@ -237,7 +237,7 @@ func (c *Controller) syncNode(key string) error { // because we don't care about its reachability status until it becomes ready. state.draining = true for svcKey, svcState := range state.allocations { - if err := c.clearServiceResources(svcKey, svcState); err != nil { + if err := c.clearServiceResourcesAndRequeue(svcKey, svcState); err != nil { return err } } @@ -252,7 +252,7 @@ func (c *Controller) syncNode(key string) error { // When it is fully drained and reachable again it will be requeued. state.draining = true for svcKey, svcState := range state.allocations { - if err := c.clearServiceResources(svcKey, svcState); err != nil { + if err := c.clearServiceResourcesAndRequeue(svcKey, svcState); err != nil { return err } } @@ -265,7 +265,7 @@ func (c *Controller) syncNode(key string) error { // If a service's selector no longer matches this node we attempt to reallocate it. for svcKey, svcState := range state.allocations { if !svcState.selector.Matches(labels.Set(n.Labels)) || svcState.stale { - if err := c.clearServiceResources(svcKey, svcState); err != nil { + if err := c.clearServiceResourcesAndRequeue(svcKey, svcState); err != nil { return err } } @@ -285,7 +285,7 @@ func (c *Controller) syncNode(key string) error { // If it does, we queue that service to attempt allocating it to this node. for svcKey, selector := range c.unallocatedServices { if selector.Matches(labels.Set(nodeLabels)) { - c.servicesQueue.Add(svcKey) + c.egressServiceQueue.Add(svcKey) } } @@ -365,7 +365,7 @@ func (c *Controller) removeNodeServiceLabel(namespace, name, node string) error // Returns the 'egress-service.k8s.ovn.org/-' key for the given namespace and name of a service. func (c *Controller) nodeLabelForService(namespace, name string) string { - return fmt.Sprintf("%s/%s-%s", util.EgressSVCLabelPrefix, namespace, name) + return fmt.Sprintf("%s/%s-%s", egressSVCLabelPrefix, namespace, name) } // Patches the node's metadata.labels with the given labels. diff --git a/go-controller/pkg/ovn/controller/egress_services/egress_services_service.go b/go-controller/pkg/ovn/controller/egress_services/egress_services_service.go index 20484dfb29..316eb44572 100644 --- a/go-controller/pkg/ovn/controller/egress_services/egress_services_service.go +++ b/go-controller/pkg/ovn/controller/egress_services/egress_services_service.go @@ -1,12 +1,8 @@ package egress_services import ( - "context" - "encoding/json" "fmt" "net" - "sync" - "time" libovsdb "github.com/ovn-org/libovsdb/ovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" @@ -17,9 +13,7 @@ import ( corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" @@ -44,17 +38,26 @@ func (c *Controller) onServiceAdd(obj interface{}) { } service := obj.(*corev1.Service) - // We only care about new LoadBalancer services that have the egress-service config annotation + // We only care about new LoadBalancer services that have an EgressService if !util.ServiceTypeHasLoadBalancer(service) || len(service.Status.LoadBalancer.Ingress) == 0 { return } - if !util.HasEgressSVCAnnotation(service) && !util.HasEgressSVCHostAnnotation(service) { + es, err := c.egressServiceLister.EgressServices(service.Namespace).Get(service.Name) + if err != nil && !apierrors.IsNotFound(err) { + // This shouldn't happen, but we queue the service in case we got an unrelated + // error when the EgressService exists + c.egressServiceQueue.Add(key) + return + } + + // There is no EgressService resource for this service so we don't queue it + if es == nil { return } klog.V(4).Infof("Adding egress service %s", key) - c.servicesQueue.Add(key) + c.egressServiceQueue.Add(key) } func (c *Controller) onServiceUpdate(oldObj, newObj interface{}) { @@ -68,344 +71,60 @@ func (c *Controller) onServiceUpdate(oldObj, newObj interface{}) { } // We only care about LoadBalancer service updates that enable/disable egress service functionality - if !util.HasEgressSVCAnnotation(oldService) && !util.HasEgressSVCAnnotation(newService) && - !util.HasEgressSVCHostAnnotation(oldService) && !util.HasEgressSVCHostAnnotation(newService) && - !util.ServiceTypeHasLoadBalancer(oldService) && !util.ServiceTypeHasLoadBalancer(newService) { + if !util.ServiceTypeHasLoadBalancer(oldService) && !util.ServiceTypeHasLoadBalancer(newService) { return } key, err := cache.MetaNamespaceKeyFunc(newObj) - if err == nil { - c.servicesQueue.Add(key) - } -} - -func (c *Controller) onServiceDelete(obj interface{}) { - key, err := cache.MetaNamespaceKeyFunc(obj) if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", newObj, err)) return } - service := obj.(*corev1.Service) - // We only care about deletions of LoadBalancer services with the annotations to cleanup - if !util.ServiceTypeHasLoadBalancer(service) { + es, err := c.egressServiceLister.EgressServices(newService.Namespace).Get(newService.Name) + if err != nil && !apierrors.IsNotFound(err) { + // This shouldn't happen, but we queue the service in case we got an unrelated + // error when the EgressService exists + c.egressServiceQueue.Add(key) return } - if !util.HasEgressSVCAnnotation(service) && !util.HasEgressSVCHostAnnotation(service) { + // There is no EgressService resource for this service so we don't queue it + if es == nil { return } - klog.V(4).Infof("Deleting egress service %s", key) - c.servicesQueue.Add(key) -} - -func (c *Controller) runServiceWorker(wg *sync.WaitGroup) { - for c.processNextServiceWorkItem(wg) { - } -} - -func (c *Controller) processNextServiceWorkItem(wg *sync.WaitGroup) bool { - wg.Add(1) - defer wg.Done() - - key, quit := c.servicesQueue.Get() - if quit { - return false - } - - defer c.servicesQueue.Done(key) - - err := c.syncService(key.(string)) - if err == nil { - c.servicesQueue.Forget(key) - return true - } - - utilruntime.HandleError(fmt.Errorf("%v failed with : %v", key, err)) - - if c.servicesQueue.NumRequeues(key) < maxRetries { - c.servicesQueue.AddRateLimited(key) - return true - } - - c.servicesQueue.Forget(key) - return true -} - -func (c *Controller) syncService(key string) error { - c.Lock() - defer c.Unlock() - - startTime := time.Now() - namespace, name, err := cache.SplitMetaNamespaceKey(key) - if err != nil { - return err - } - klog.Infof("Processing sync for Egress Service %s/%s", namespace, name) - - defer func() { - klog.V(4).Infof("Finished syncing Egress Service %s/%s : %v", namespace, name, time.Since(startTime)) - }() - - svc, err := c.serviceLister.Services(namespace).Get(name) - if err != nil && !apierrors.IsNotFound(err) { - return err - } - - state := c.services[key] - if svc == nil && state == nil { - // The service was deleted and was not an allocated egress service. - // We delete it from the unallocated service cache just in case. - delete(c.unallocatedServices, key) - return nil - } - - if svc == nil && state != nil { - // The service was deleted and was an egress service. - // We delete all of its relevant resources to avoid leaving stale configuration. - return c.clearServiceResources(key, state) - } - - if state != nil && state.stale { - // The service is marked stale because something failed when trying to delete it. - // We try to delete it again before doing anything else. - return c.clearServiceResources(key, state) - } - - if state == nil && len(svc.Status.LoadBalancer.Ingress) == 0 { - // The service wasn't configured before and does not have an ingress ip. - // we don't need to configure it and make sure it does not have a stale host annotation or unallocated entry. - delete(c.unallocatedServices, key) - return c.removeServiceNodeAnnotation(namespace, name) - } - - if state != nil && len(svc.Status.LoadBalancer.Ingress) == 0 { - // The service has no ingress ips so it is not considered valid anymore. - return c.clearServiceResources(key, state) - } - - conf, err := util.ParseEgressSVCAnnotation(svc.Annotations) - if err != nil && !util.IsAnnotationNotSetError(err) { - return err - } - - if conf == nil && state == nil { - // The service does not have the config annotation and wasn't configured before. - // We make sure it does not have a stale host annotation or unallocated entry. - delete(c.unallocatedServices, key) - return c.removeServiceNodeAnnotation(namespace, name) - } - - if conf == nil && state != nil { - // The service is configured but does no longer have the config annotation, - // meaning we should clear all of its resources. - return c.clearServiceResources(key, state) - } - - // At this point conf != nil - - nodeSelector := &conf.NodeSelector - v4Endpoints, v6Endpoints, epsNodes, err := c.allEndpointsFor(svc) - if err != nil { - return err - } - - // If the service is ETP=Local we'd like to add an additional constraint to the selector - // that only a node with local eps can be selected. Otherwise new ingress traffic will break. - if len(epsNodes) != 0 && svc.Spec.ExternalTrafficPolicy == corev1.ServiceExternalTrafficPolicyTypeLocal { - matchEpsNodes := metav1.LabelSelectorRequirement{ - Key: "kubernetes.io/hostname", - Operator: metav1.LabelSelectorOpIn, - Values: epsNodes, - } - nodeSelector.MatchExpressions = append(nodeSelector.MatchExpressions, matchEpsNodes) - } - - selector, err := metav1.LabelSelectorAsSelector(nodeSelector) - if err != nil { - return err - } - - totalEps := len(v4Endpoints) + len(v6Endpoints) - - // We don't want to select a node for a service without endpoints to not "waste" an - // allocation on a node. - if totalEps == 0 && state == nil { - c.unallocatedServices[key] = selector - return c.removeServiceNodeAnnotation(namespace, name) - } - - if totalEps == 0 && state != nil { - c.unallocatedServices[key] = selector - return c.clearServiceResources(key, state) - } - - if state == nil { - // The service has a valid config annotation and wasn't configured before. - // This means we need to select a node for it that matches its selector. - c.unallocatedServices[key] = selector - - node, err := c.selectNodeFor(selector) - if err != nil { - return err - } - - // We found a node - update the caches with the new objects. - delete(c.unallocatedServices, key) - newState := &svcState{node: node.name, selector: selector, v4Endpoints: sets.New[string](), v6Endpoints: sets.New[string](), stale: false} - c.services[key] = newState - node.allocations[key] = newState - c.nodes[node.name] = node - state = newState - } - - state.selector = selector - node := c.nodes[state.node] - - if !state.selector.Matches(labels.Set(node.labels)) { - // The node no longer matches the selector. - // We clear its configured resources and requeue it to attempt - // selecting a new node for it. - return c.clearServiceResources(key, state) - } - - // At this point the states are valid and we should create the proper logical router policies. - // We reach the desired state by fetching all of the endpoints associated to the service and comparing - // to the known state: - // We need to create policies for endpoints that were fetched but not found in the cache, - // and delete the policies for those which are found in the cache but were not fetched. - // We do it in one transaction, if it succeeds we update the cache to reflect the new state. - - err = c.annotateServiceWithNode(namespace, name, state.node) // annotate the service, will also override manual changes - if err != nil { - return err - } - - v4ToAdd := v4Endpoints.Difference(state.v4Endpoints).UnsortedList() - v6ToAdd := v6Endpoints.Difference(state.v6Endpoints).UnsortedList() - v4ToRemove := state.v4Endpoints.Difference(v4Endpoints).UnsortedList() - v6ToRemove := state.v6Endpoints.Difference(v6Endpoints).UnsortedList() - - allOps := []libovsdb.Operation{} - createOps, err := c.createLogicalRouterPoliciesOps(key, node.v4MgmtIP.String(), node.v6MgmtIP.String(), v4ToAdd, v6ToAdd) - if err != nil { - return err - } - allOps = append(allOps, createOps...) - - createOps, err = c.addPodIPsToAddressSetOps(createIPAddressNetSlice(v4ToAdd, v6ToAdd)) - if err != nil { - return err - } - allOps = append(allOps, createOps...) - - deleteOps, err := c.deleteLogicalRouterPoliciesOps(key, v4ToRemove, v6ToRemove) - if err != nil { - return err - } - allOps = append(allOps, deleteOps...) - - createOps, err = c.deletePodIPsFromAddressSetOps(createIPAddressNetSlice(v4ToRemove, v6ToRemove)) - if err != nil { - return err - } - allOps = append(allOps, createOps...) - - if _, err := libovsdbops.TransactAndCheck(c.nbClient, allOps); err != nil { - return fmt.Errorf("failed to update router policies for %s, err: %v", key, err) - } - - state.v4Endpoints.Insert(v4ToAdd...) - state.v4Endpoints.Delete(v4ToRemove...) - state.v6Endpoints.Insert(v6ToAdd...) - state.v6Endpoints.Delete(v6ToRemove...) - - // We configured OVN - the last step is to label the node - // to mark it as the node holding the service. - return c.labelNodeForService(namespace, name, node.name) + c.egressServiceQueue.Add(key) } -// Removes all of the resources that belong to the egress service. -// This includes removing the host annotation, the logical router policies, -// the label from the node and updating the caches. -// This also requeues the service after cleaning up to be sure we are not -// missing an event after marking it as stale that should be handled. -// This should only be called with the controller locked. -func (c *Controller) clearServiceResources(key string, svcState *svcState) error { - namespace, name, err := cache.SplitMetaNamespaceKey(key) - if err != nil { - return err - } - - svcState.stale = true - if err := c.removeServiceNodeAnnotation(namespace, name); err != nil { - return err - } - - p := func(item *nbdb.LogicalRouterPolicy) bool { - return item.ExternalIDs[svcExternalIDKey] == key - } - - deleteOps := []libovsdb.Operation{} - deleteOps, err = libovsdbops.DeleteLogicalRouterPolicyWithPredicateOps(c.nbClient, deleteOps, ovntypes.OVNClusterRouter, p) +func (c *Controller) onServiceDelete(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) if err != nil { - return err - } - - if _, err := libovsdbops.TransactAndCheck(c.nbClient, deleteOps); err != nil { - return fmt.Errorf("failed to clean router policies for %s, err: %v", key, err) - } - - nodeState, found := c.nodes[svcState.node] - if found { - if err := c.removeNodeServiceLabel(namespace, name, svcState.node); err != nil { - return fmt.Errorf("failed to remove svc node label for %s, err: %v", svcState.node, err) - } - delete(nodeState.allocations, key) + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return } - delete(c.services, key) - c.servicesQueue.Add(key) - return nil -} - -// Annotates the given service with the 'k8s.ovn.org/egress-service-host=' annotation -func (c *Controller) annotateServiceWithNode(namespace, name string, node string) error { - annotations := map[string]any{util.EgressSVCHostAnnotation: node} - return c.patchServiceAnnotations(namespace, name, annotations) -} - -// Removes the 'k8s.ovn.org/egress-service-host=' annotation from the given service. -func (c *Controller) removeServiceNodeAnnotation(namespace, name string) error { - annotations := map[string]any{util.EgressSVCHostAnnotation: nil} // Patching with a nil value results in the delete of the key - return c.patchServiceAnnotations(namespace, name, annotations) -} - -// Patches the service's metadata.annotations with the given annotations. -func (c *Controller) patchServiceAnnotations(namespace, name string, annotations map[string]any) error { - patch := struct { - Metadata map[string]any `json:"metadata"` - }{ - Metadata: map[string]any{ - "annotations": annotations, - }, + service := obj.(*corev1.Service) + // We only care about deletions of LoadBalancer services + if !util.ServiceTypeHasLoadBalancer(service) { + return } - klog.V(4).Infof("Setting annotations %v on service %s/%s", annotations, namespace, name) - patchData, err := json.Marshal(&patch) - if err != nil { - klog.Errorf("Error in setting annotations on service %s/%s: %v", namespace, name, err) - return err + klog.V(4).Infof("Deleting egress service %s", key) + es, err := c.egressServiceLister.EgressServices(service.Namespace).Get(service.Name) + if err != nil && !apierrors.IsNotFound(err) { + // This shouldn't happen, but we queue the service in case we got an unrelated + // error when the EgressService exists + c.egressServiceQueue.Add(key) + return } - _, err = c.client.CoreV1().Services(namespace).Patch(context.TODO(), name, types.MergePatchType, patchData, metav1.PatchOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - return err + // There is no EgressService resource for this service so we don't queue it + if es == nil { + return } - return nil + c.egressServiceQueue.Add(key) } // Returns all of the non-host endpoints for the given service grouped by IPv4/IPv6. diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 19fbaad6c1..1ed857d6b8 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -162,11 +162,6 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, if err != nil { return nil, fmt.Errorf("unable to create new service controller while creating new default network controller: %w", err) } - egressSvcController, err := newEgressServiceController(cnci.client, cnci.nbClient, addressSetFactory, svcFactory, - defaultStopChan, DefaultNetworkControllerName) - if err != nil { - return nil, fmt.Errorf("unable to create new egress service controller while creating new default network controller: %w", err) - } var zoneICHandler *zoneic.ZoneInterconnectHandler var zoneChassisHandler *zoneic.ZoneChassisHandler @@ -214,7 +209,6 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, routerLoadBalancerGroupUUID: "", svcController: svcController, svcFactory: svcFactory, - egressSvcController: egressSvcController, zoneICHandler: zoneICHandler, zoneChassisHandler: zoneChassisHandler, } @@ -508,11 +502,18 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { }() } - oc.wg.Add(1) - go func() { - defer oc.wg.Done() - oc.egressSvcController.Run(1) - }() + if config.OVNKubernetesFeature.EnableEgressService { + c, err := oc.InitEgressServiceController() + if err != nil { + return fmt.Errorf("unable to create new egress service controller while creating new default network controller: %w", err) + } + oc.egressSvcController = c + oc.wg.Add(1) + go func() { + defer oc.wg.Done() + oc.egressSvcController.Run(1) + }() + } end := time.Since(start) klog.Infof("Completing all the Watchers took %v", end) diff --git a/go-controller/pkg/ovn/egressservices_test.go b/go-controller/pkg/ovn/egressservices_test.go index d3ce7a4532..dffda0504e 100644 --- a/go-controller/pkg/ovn/egressservices_test.go +++ b/go-controller/pkg/ovn/egressservices_test.go @@ -11,13 +11,13 @@ import ( "github.com/onsi/gomega" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressserviceapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/urfave/cli/v2" @@ -33,17 +33,18 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { fakeOVN *FakeOVN ) const ( - node1Name string = "node1" - node1IPv4 string = "100.100.100.0" - node1IPv6 string = "fc00:f853:ccd:e793::1" - node1IPv4Subnet string = "10.128.1.0/24" - node1IPv6Subnet string = "fe00:10:128:1::/64" - node2Name string = "node2" - node2IPv4 string = "200.200.200.0" - node2IPv6 string = "fc00:f853:ccd:e793::2" - node2IPv4Subnet string = "10.128.2.0/24" - node2IPv6Subnet string = "fe00:10:128:2::/64" - controllerName = DefaultNetworkControllerName + node1Name string = "node1" + node1IPv4 string = "100.100.100.0" + node1IPv6 string = "fc00:f853:ccd:e793::1" + node1IPv4Subnet string = "10.128.1.0/24" + node1IPv6Subnet string = "fe00:10:128:1::/64" + node2Name string = "node2" + node2IPv4 string = "200.200.200.0" + node2IPv6 string = "fc00:f853:ccd:e793::2" + node2IPv4Subnet string = "10.128.2.0/24" + node2IPv6Subnet string = "fe00:10:128:2::/64" + controllerName = DefaultNetworkControllerName + egressSVCLabelPrefix string = "egress-service.k8s.ovn.org" ) ginkgo.BeforeEach(func() { @@ -51,6 +52,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { config.PrepareTestConfig() // disabling EgressIP to be sure we're creating the no reroute policies ourselves config.OVNKubernetesFeature.EnableEgressIP = false + config.OVNKubernetesFeature.EnableEgressService = true _, cidr4, _ := net.ParseCIDR("10.128.0.0/16") _, cidr6, _ := net.ParseCIDR("fe00::/16") config.Default.ClusterSubnets = []config.CIDRNetworkEntry{{cidr4, 24}, {cidr6, 64}} @@ -78,18 +80,45 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { ObjectMeta: metav1.ObjectMeta{Name: "nolongeregresssvc", Namespace: "testns"}, } - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{}", - util.EgressSVCHostAnnotation: node1Name, - }) - svc2 := svcFor("testns", "svc2", map[string]string{ - util.EgressSVCAnnotation: "{}", - util.EgressSVCHostAnnotation: node2Name, - }) - svc3 := svcFor("testns", "svc3", map[string]string{ - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"kubernetes.io/hostname\": \"node2\"}}}", - util.EgressSVCHostAnnotation: node2Name, - }) + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: node1Name, + }, + } + svc1 := lbSvcFor("testns", "svc1") + + esvc2 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc2", + Namespace: "testns", + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: node2Name, + }, + } + svc2 := lbSvcFor("testns", "svc2") + + esvc3 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc3", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "kubernetes.io/hostname": node2.Name, + }, + }, + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: node2Name, + }, + } + svc3 := lbSvcFor("testns", "svc3") svc3.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal svc1EpSlice := discovery.EndpointSlice{ @@ -147,7 +176,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } staleLRP2 := &nbdb.LogicalRouterPolicy{ - ExternalIDs: map[string]string{"EgressSVC": "testns/nolongeregresssvc"}, // configuration annotation removed + ExternalIDs: map[string]string{"EgressSVC": "testns/nolongeregresssvc"}, // corresponding EgressService removed Priority: types.EgressSVCReroutePriority, UUID: "staleLRP2-UUID", Match: "ip4.src == 10.10.10.10", @@ -245,10 +274,16 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { svc3EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + esvc2, + esvc3, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() - clusterRouter.Policies = []string{"toKeepLRP1-UUID", "toKeepLRP2-UUID"} expectedDatabaseState := []libovsdbtest.TestData{ toKeepLRP1, @@ -276,28 +311,41 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { node1.Labels = map[string]string{ "unrelated-label": "", - fmt.Sprintf("%s/deleted-service1", util.EgressSVCLabelPrefix): "", - fmt.Sprintf("%s/deleted-service2", util.EgressSVCLabelPrefix): "", - fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix): "", - fmt.Sprintf("%s/testns-svc2", util.EgressSVCLabelPrefix): "", + fmt.Sprintf("%s/deleted-service1", egressSVCLabelPrefix): "", + fmt.Sprintf("%s/deleted-service2", egressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc2", egressSVCLabelPrefix): "", } node2.Labels = map[string]string{ "unrelated-label": "", - fmt.Sprintf("%s/deleted-service3", util.EgressSVCLabelPrefix): "", - fmt.Sprintf("%s/deleted-service4", util.EgressSVCLabelPrefix): "", - fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix): "", - fmt.Sprintf("%s/testns-svc2", util.EgressSVCLabelPrefix): "", + fmt.Sprintf("%s/deleted-service3", egressSVCLabelPrefix): "", + fmt.Sprintf("%s/deleted-service4", egressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc2", egressSVCLabelPrefix): "", } - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{}", - util.EgressSVCHostAnnotation: node1Name, - }) - svc2 := svcFor("testns", "svc2", map[string]string{ - util.EgressSVCAnnotation: "{}", - util.EgressSVCHostAnnotation: node2Name, - }) + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: node1Name, + }, + } + svc1 := lbSvcFor("testns", "svc1") + + esvc2 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc2", + Namespace: "testns", + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: node2Name, + }, + } + svc2 := lbSvcFor("testns", "svc2") svc1EpSlice := discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ @@ -362,6 +410,12 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { svc2EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + esvc2, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() @@ -369,7 +423,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { gomega.Eventually(func() error { expectedLabels := map[string]string{ "unrelated-label": "", - fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix): "", } node1, err := fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node1Name, metav1.GetOptions{}) @@ -387,7 +441,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { gomega.Eventually(func() error { expectedLabels := map[string]string{ "unrelated-label": "", - fmt.Sprintf("%s/testns-svc2", util.EgressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc2", egressSVCLabelPrefix): "", } node2, err := fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node2Name, metav1.GetOptions{}) @@ -408,10 +462,75 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { err := app.Run([]string{app.Name}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) }) + + ginkgo.It("should delete stale status from EgressServices", func() { + app.Action = func(ctx *cli.Context) error { + namespaceT := *newNamespace("testns") + node1 := nodeFor(node1Name, node1IPv4, node1IPv6, node1IPv4Subnet, node1IPv6Subnet) + + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: node1Name, + }, + } + + dbSetup := libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + }, + }, + } + + fakeOVN.startWithDBSetup(dbSetup, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.NodeList{ + Items: []v1.Node{ + *node1, + }, + }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + }, + }, + ) + + fakeOVN.InitAndRunEgressSVCController() + + gomega.Eventually(func() error { + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), esvc1.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + if es.Status.Host != "" { + return fmt.Errorf("expected svc1's host value %s to be empty", es.Status.Host) + } + + return nil + return nil + }).ShouldNot(gomega.HaveOccurred()) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) }) ginkgo.Context("on services changes", func() { - ginkgo.It("should create/update/delete service host annotations", func() { + ginkgo.It("should create/update/delete EgressService host", func() { app.Action = func(ctx *cli.Context) error { namespaceT := *newNamespace("testns") node1 := nodeFor(node1Name, node1IPv4, node1IPv6, node1IPv4Subnet, node1IPv6Subnet) @@ -431,9 +550,20 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } ginkgo.By("creating a service that will be allocated on the first node") - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"firstName\": \"Albus\"}}}", - }) + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "firstName": "Albus", + }, + }, + }, + } + svc1 := lbSvcFor("testns", "svc1") svc1EpSlice := discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ Name: "svc1-epslice", @@ -487,112 +617,87 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { svc2EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) if err != nil { return err } - svcHost, err := util.GetEgressSVCHost(svc) - if err != nil { - return err - } - - if svcHost != node1.Name { - return fmt.Errorf("expected svc1's host annotation value %s to be node1", svcHost) + if es.Status.Host != node1.Name { + return fmt.Errorf("expected svc1's host value %s to be node1", es.Status.Host) } return nil }).ShouldNot(gomega.HaveOccurred()) - ginkgo.By("creating a second service without any egress service config annotation") - s2 := svcFor("testns", "svc2", map[string]string{}) + ginkgo.By("creating a second service without any EgressService") + s2 := lbSvcFor("testns", "svc2") svc2 := &s2 svc2, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Create(context.TODO(), svc2, metav1.CreateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc2.Name, metav1.GetOptions{}) - if err != nil { - return err - } - - val, ok := svc.Annotations[util.EgressSVCHostAnnotation] - if ok { - return fmt.Errorf("expected svc2's egress host annotation to be empty, got: %s", val) - } - return nil - }).ShouldNot(gomega.HaveOccurred()) - - ginkgo.By("updating the second service with a config that matches the second node the host annotation will be created") - svc2.Annotations = map[string]string{util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"firstName\": \"Severus\"}}}"} - svc2.ResourceVersion = "2" - _, err = fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Update(context.TODO(), svc2, metav1.UpdateOptions{}) + ginkgo.By("creating an EgressService for the second service with a config that matches the second node its status will be updated") + esvc2 := &egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc2", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "firstName": "Severus", + }, + }, + }, + } + _, err = fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Create(context.TODO(), esvc2, metav1.CreateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc2.Name, metav1.GetOptions{}) - if err != nil { - return err - } - - svcHost, err := util.GetEgressSVCHost(svc) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), esvc2.Name, metav1.GetOptions{}) if err != nil { return err } - if svcHost != node2.Name { - return fmt.Errorf("expected svc2's host annotation value %s to be node2", svcHost) + if es.Status.Host != node2.Name { + return fmt.Errorf("expected svc2's host value %s to be node2", es.Status.Host) } return nil }).ShouldNot(gomega.HaveOccurred()) - ginkgo.By("updating the second service's config to match the first node instead of the second its host annotation will be updated") - svc2.Annotations = map[string]string{util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"firstName\": \"Albus\"}}}"} - svc2.ResourceVersion = "3" - _, err = fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Update(context.TODO(), svc2, metav1.UpdateOptions{}) + ginkgo.By("updating the second service's config to match the first node instead of the second its status will be updated") + esvc2.Spec.NodeSelector = metav1.LabelSelector{ + MatchLabels: map[string]string{ + "firstName": "Albus", + }, + } + esvc2.ResourceVersion = "2" + _, err = fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Update(context.TODO(), esvc2, metav1.UpdateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc2.Name, metav1.GetOptions{}) - if err != nil { - return err - } - - svcHost, err := util.GetEgressSVCHost(svc) - if err != nil { - return err - } - - if svcHost != node1.Name { - return fmt.Errorf("expected svc2's host annotation value %s to be node1", svcHost) - } - - return nil - }).ShouldNot(gomega.HaveOccurred()) - ginkgo.By("removing the config annotation from the second service its host annotation will be deleted") - delete(svc2.Annotations, util.EgressSVCAnnotation) - svc2.ResourceVersion = "4" - _, err = fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Update(context.TODO(), svc2, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc2.Name, metav1.GetOptions{}) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), esvc2.Name, metav1.GetOptions{}) if err != nil { return err } - val, ok := svc.Annotations[util.EgressSVCHostAnnotation] - if ok { - return fmt.Errorf("expected svc2's egress host annotation to be empty, got: %s", val) + if es.Status.Host != node1.Name { + return fmt.Errorf("expected svc2's host value %s to be node1", es.Status.Host) } return nil }).ShouldNot(gomega.HaveOccurred()) + return nil } err := app.Run([]string{app.Name}) @@ -620,9 +725,20 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } ginkgo.By("creating a service with v4 and v6 endpoints it will be allocated on the first node") - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"house\": \"Gryffindor\"}}}", - }) + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "house": "Gryffindor", + }, + }, + }, + } + svc1 := lbSvcFor("testns", "svc1") v4EpSlice := discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ @@ -685,6 +801,11 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { v6EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() @@ -708,11 +829,14 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - ginkgo.By("updating the service's config to match the second node instead of the first its lrps' nexthop will be updated") - - svc1.Annotations = map[string]string{util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"house\": \"Slytherin\"}}}"} - svc1.ResourceVersion = "2" - _, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Update(context.TODO(), &svc1, metav1.UpdateOptions{}) + ginkgo.By("updating the EgressService's config to match the second node instead of the first its lrps' nexthop will be updated") + esvc1.Spec.NodeSelector = metav1.LabelSelector{ + MatchLabels: map[string]string{ + "house": "Slytherin", + }, + } + esvc1.ResourceVersion = "2" + _, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Update(context.TODO(), &esvc1, metav1.UpdateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) v4lrp1.Nexthops[0] = "10.128.2.2" @@ -735,11 +859,10 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - ginkgo.By("removing the config annotation from the service it lrps will be removed") - delete(svc1.Annotations, util.EgressSVCAnnotation) - svc1.ResourceVersion = "3" - _, err = fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Update(context.TODO(), &svc1, metav1.UpdateOptions{}) + ginkgo.By("removing the EgressService its lrps will be removed") + err = fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Delete(context.TODO(), esvc1.Name, metav1.DeleteOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) + clusterRouter.Policies = []string{} expectedDatabaseState = []libovsdbtest.TestData{clusterRouter} for _, lrp := range getDefaultNoReroutePolicies(controllerName) { @@ -773,10 +896,21 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { }, } - ginkgo.By("creating a service that will be allocated on the first node it will be labeled") - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"animal\": \"FlyingBison\"}}}", - }) + ginkgo.By("creating an egress service that will be allocated on the first node it will be labeled") + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "animal": "FlyingBison", + }, + }, + }, + } + svc1 := lbSvcFor("testns", "svc1") svc1EpSlice := discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ Name: "svc1-epslice", @@ -814,6 +948,11 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { svc1EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() @@ -821,7 +960,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { gomega.Eventually(func() error { node1ExpectedLabels := map[string]string{ "animal": "FlyingBison", - fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix): "", } node1, err := fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node1Name, metav1.GetOptions{}) @@ -850,9 +989,13 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { }).ShouldNot(gomega.HaveOccurred()) ginkgo.By("updating the service to be allocated on the second node its label will move to the second node") - svc1.Annotations = map[string]string{util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"animal\": \"Lemur\"}}}"} - svc1.ResourceVersion = "2" - _, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Update(context.TODO(), &svc1, metav1.UpdateOptions{}) + esvc1.Spec.NodeSelector = metav1.LabelSelector{ + MatchLabels: map[string]string{ + "animal": "Lemur", + }, + } + esvc1.ResourceVersion = "2" + _, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Update(context.TODO(), &esvc1, metav1.UpdateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Eventually(func() error { @@ -871,7 +1014,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { node2ExpectedLabels := map[string]string{ "animal": "Lemur", - fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix): "", } node2, err := fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node2Name, metav1.GetOptions{}) @@ -886,8 +1029,8 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { return nil }).ShouldNot(gomega.HaveOccurred()) - ginkgo.By("deleting the service both nodes will not have the label") - err = fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Delete(context.TODO(), svc1.Name, metav1.DeleteOptions{}) + ginkgo.By("deleting the EgressService both nodes will not have the label") + err = fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Delete(context.TODO(), esvc1.Name, metav1.DeleteOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Eventually(func() error { @@ -942,10 +1085,21 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { }, } - svc1 := svcFor("testns", "svc1", map[string]string{ - // ":", "&" not allowed in labels - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"a:b\": \"c&\"}}}", - }) + // ":", "&" not allowed in labels + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "a:b": "c&", + }, + }, + }, + } + svc1 := lbSvcFor("testns", "svc1") svc1EpSlice := discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ Name: "svc1-epslice", @@ -982,19 +1136,23 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { svc1EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() gomega.Consistently(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) if err != nil { return err } - val, ok := svc.Annotations[util.EgressSVCHostAnnotation] - if ok { - return fmt.Errorf("expected svc1 to not have a host annotation, got a value of %v", val) + if es.Status.Host != "" { + return fmt.Errorf("expected svc1 to not have a host, got a value of %v", es.Status.Host) } node1, err := fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node1Name, metav1.GetOptions{}) @@ -1002,7 +1160,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { return err } - _, ok = node1.Labels[fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix)] + _, ok := node1.Labels[fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix)] if ok { return fmt.Errorf("expected node1 to not have the egress service label, got %v", node1.Labels) @@ -1037,9 +1195,13 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } ginkgo.By("creating a service with v4 endpoints that will be allocated on the node") - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{}", - }) + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + } + svc1 := lbSvcFor("testns", "svc1") v4EpSlice := &discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ @@ -1102,6 +1264,11 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { *v4EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() @@ -1224,9 +1391,20 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } ginkgo.By("creating a service with a selector matching a node without local eps lrps should not be created") - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"square\": \"pants\"}}}", - }) + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "square": "pants", + }, + }, + }, + } + svc1 := lbSvcFor("testns", "svc1") svc1.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal v4EpSlice := &discovery.EndpointSlice{ @@ -1268,6 +1446,11 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { *v4EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() @@ -1319,7 +1502,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { }, } v4EpSlice.ResourceVersion = "3" - v4EpSlice, err = fakeOVN.fakeClient.KubeClient.DiscoveryV1().EndpointSlices("testns").Update(context.TODO(), v4EpSlice, metav1.UpdateOptions{}) + _, err = fakeOVN.fakeClient.KubeClient.DiscoveryV1().EndpointSlices("testns").Update(context.TODO(), v4EpSlice, metav1.UpdateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) clusterRouter.Policies = []string{} @@ -1341,7 +1524,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { }) ginkgo.Context("on nodes changes", func() { - ginkgo.It("should create/update/delete logical router policies, labels and annotations", func() { + ginkgo.It("should create/update/delete logical router policies, labels and status", func() { app.Action = func(ctx *cli.Context) error { namespaceT := *newNamespace("testns") config.IPv6Mode = true @@ -1362,12 +1545,35 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } ginkgo.By("creating two services with different selectors") - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"home\": \"pineapple\"}}}", - }) - svc2 := svcFor("testns", "svc2", map[string]string{ - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"home\": \"moai\"}}}", - }) + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "home": "pineapple", + }, + }, + }, + } + svc1 := lbSvcFor("testns", "svc1") + + esvc2 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc2", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "home": "moai", + }, + }, + }, + } + svc2 := lbSvcFor("testns", "svc2") svc1V4EpSlice := discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ @@ -1459,23 +1665,28 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { svc2V6EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + esvc2, + }, + }, ) fakeOVN.InitAndRunEgressSVCController() gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), esvc1.Name, metav1.GetOptions{}) if err != nil { return err } - val := svc.Annotations[util.EgressSVCHostAnnotation] - if val != node1Name { - return fmt.Errorf("expected svc1's host annotation value %s to be node1", val) + if es.Status.Host != node1.Name { + return fmt.Errorf("expected svc1's host value %s to be node1", es.Status.Host) } node1ExpectedLabels := map[string]string{ "home": "pineapple", - fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix): "", } node1, err = fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node1Name, metav1.GetOptions{}) @@ -1487,17 +1698,13 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { return fmt.Errorf("expected node1's labels %v to be equal %v", node1.Labels, node1ExpectedLabels) } - return nil - }).ShouldNot(gomega.HaveOccurred()) - gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc2.Name, metav1.GetOptions{}) + es, err = fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), esvc2.Name, metav1.GetOptions{}) if err != nil { return err } - val, ok := svc.Annotations[util.EgressSVCHostAnnotation] - if ok { - return fmt.Errorf("expected svc2's egress host annotation to be empty, got: %s", val) + if es.Status.Host != "" { + return fmt.Errorf("expected svc2's host value %s to be empty", es.Status.Host) } node2ExpectedLabels := map[string]string{ @@ -1549,14 +1756,13 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), esvc1.Name, metav1.GetOptions{}) if err != nil { return err } - val, ok := svc.Annotations[util.EgressSVCHostAnnotation] - if ok { - return fmt.Errorf("expected svc1's egress host annotation to be empty, got: %s", val) + if es.Status.Host != "" { + return fmt.Errorf("expected svc1's host value %s to be empty", es.Status.Host) } node1ExpectedLabels := map[string]string{ @@ -1572,23 +1778,18 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { return fmt.Errorf("expected node1's labels %v to be equal %v", node1.Labels, node1ExpectedLabels) } - return nil - }).ShouldNot(gomega.HaveOccurred()) - - gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc2.Name, metav1.GetOptions{}) + es, err = fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), esvc2.Name, metav1.GetOptions{}) if err != nil { return err } - val := svc.Annotations[util.EgressSVCHostAnnotation] - if val != node2Name { - return fmt.Errorf("expected svc2's host annotation value %s to be node2", val) + if es.Status.Host != node2.Name { + return fmt.Errorf("expected svc1's host value %s to be node2", es.Status.Host) } node2ExpectedLabels := map[string]string{ "home": "moai", - fmt.Sprintf("%s/testns-svc2", util.EgressSVCLabelPrefix): "", + fmt.Sprintf("%s/testns-svc2", egressSVCLabelPrefix): "", } node2, err = fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node2Name, metav1.GetOptions{}) @@ -1637,7 +1838,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { gomega.Expect(err).ToNot(gomega.HaveOccurred()) }) - ginkgo.It("should update logical router policies, labels and annotations on reachability failure", func() { + ginkgo.It("should update logical router policies, labels and status on reachability failure", func() { app.Action = func(ctx *cli.Context) error { namespaceT := *newNamespace("testns") config.IPv6Mode = true @@ -1655,9 +1856,20 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } ginkgo.By("creating a service selecting the node") - svc1 := svcFor("testns", "svc1", map[string]string{ - util.EgressSVCAnnotation: "{\"nodeSelector\":{\"matchLabels\":{\"kubernetes.io/hostname\": \"node1\"}}}", - }) + esvc1 := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "svc1", + Namespace: "testns", + }, + Spec: egressserviceapi.EgressServiceSpec{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "kubernetes.io/hostname": node1.Name, + }, + }, + }, + } + svc1 := lbSvcFor("testns", "svc1") svc1V4EpSlice := discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ @@ -1713,29 +1925,34 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { svc1V6EpSlice, }, }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + esvc1, + }, + }, ) ginkgo.By("modifying the controller's IsReachable func to return false on the first call and true for the second") count := 0 - fakeOVN.controller.egressSvcController.IsReachable = func(nodeName string, _ []net.IP, _ healthcheck.EgressIPHealthClient) bool { - count++ - return count == 2 - } - fakeOVN.InitAndRunEgressSVCController() + fakeOVN.InitAndRunEgressSVCController(func(c *DefaultNetworkController) { + c.egressSvcController.IsReachable = func(nodeName string, _ []net.IP, _ healthcheck.EgressIPHealthClient) bool { + count++ + return count == 2 + } + }) gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) if err != nil { return err } - val := svc.Annotations[util.EgressSVCHostAnnotation] - if val != node1Name { - return fmt.Errorf("expected svc1's host annotation value %s to be node1", val) + if es.Status.Host != node1.Name { + return fmt.Errorf("expected svc1's host value %s to be node1", es.Status.Host) } node1ExpectedLabels := map[string]string{ - "kubernetes.io/hostname": "node1", - fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix): "", + "kubernetes.io/hostname": "node1", + fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix): "", } node1, err = fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node1Name, metav1.GetOptions{}) @@ -1768,14 +1985,13 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { ginkgo.By("calling the reachability check which will return that the node is unreachable it will be drained") fakeOVN.controller.egressSvcController.CheckNodesReachabilityIterate() gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) if err != nil { return err } - val, ok := svc.Annotations[util.EgressSVCHostAnnotation] - if ok { - return fmt.Errorf("expected svc1's egress host annotation to be empty, got: %s", val) + if es.Status.Host != "" { + return fmt.Errorf("expected svc1's host value %s to be empty", es.Status.Host) } node1ExpectedLabels := map[string]string{ @@ -1804,19 +2020,18 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { ginkgo.By("calling the reachability check which will return that the node is reachable the service will be reallocated") fakeOVN.controller.egressSvcController.CheckNodesReachabilityIterate() gomega.Eventually(func() error { - svc, err := fakeOVN.fakeClient.KubeClient.CoreV1().Services("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) + es, err := fakeOVN.fakeClient.EgressServiceClient.K8sV1().EgressServices("testns").Get(context.TODO(), svc1.Name, metav1.GetOptions{}) if err != nil { return err } - val := svc.Annotations[util.EgressSVCHostAnnotation] - if val != node1Name { - return fmt.Errorf("expected svc1's host annotation value %s to be node1", val) + if es.Status.Host != node1.Name { + return fmt.Errorf("expected svc1's host value %s to be node1", es.Status.Host) } node1ExpectedLabels := map[string]string{ - "kubernetes.io/hostname": "node1", - fmt.Sprintf("%s/testns-svc1", util.EgressSVCLabelPrefix): "", + "kubernetes.io/hostname": "node1", + fmt.Sprintf("%s/testns-svc1", egressSVCLabelPrefix): "", } node1, err = fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Get(context.TODO(), node1Name, metav1.GetOptions{}) @@ -1852,8 +2067,14 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { }) -func (o *FakeOVN) InitAndRunEgressSVCController() { +func (o *FakeOVN) InitAndRunEgressSVCController(tweak ...func(*DefaultNetworkController)) { o.controller.svcFactory.Start(o.stopChan) + c, err := o.controller.InitEgressServiceController() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + o.controller.egressSvcController = c + for _, t := range tweak { + t(o.controller) + } o.egressSVCWg.Add(1) go func() { defer o.egressSVCWg.Done() @@ -1930,12 +2151,11 @@ func getDefaultNoReroutePolicies(controllerName string) []*nbdb.LogicalRouterPol return allLRPS } -func svcFor(namespace, name string, annotations map[string]string) v1.Service { +func lbSvcFor(namespace, name string) v1.Service { return v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, - Annotations: annotations, ResourceVersion: "1", }, Spec: v1.ServiceSpec{ diff --git a/go-controller/pkg/ovn/hybrid_test.go b/go-controller/pkg/ovn/hybrid_test.go index b9a63ce8b6..641e099893 100644 --- a/go-controller/pkg/ovn/hybrid_test.go +++ b/go-controller/pkg/ovn/hybrid_test.go @@ -29,6 +29,7 @@ import ( egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" + egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" @@ -302,11 +303,13 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { egressFirewallFakeClient := &egressfirewallfake.Clientset{} egressIPFakeClient := &egressipfake.Clientset{} egressQoSFakeClient := &egressqosfake.Clientset{} + egressServiceFakeClient := &egressservicefake.Clientset{} fakeClient := &util.OVNClientset{ KubeClient: kubeFakeClient, EgressIPClient: egressIPFakeClient, EgressFirewallClient: egressFirewallFakeClient, EgressQoSClient: egressQoSFakeClient, + EgressServiceClient: egressServiceFakeClient, } vlanID := 1024 @@ -588,11 +591,13 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { egressFirewallFakeClient := &egressfirewallfake.Clientset{} egressIPFakeClient := &egressipfake.Clientset{} egressQoSFakeClient := &egressqosfake.Clientset{} + egressServiceFakeClient := &egressservicefake.Clientset{} fakeClient := &util.OVNClientset{ KubeClient: kubeFakeClient, EgressIPClient: egressIPFakeClient, EgressFirewallClient: egressFirewallFakeClient, EgressQoSClient: egressQoSFakeClient, + EgressServiceClient: egressServiceFakeClient, } vlanID := 1024 @@ -800,11 +805,13 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { egressFirewallFakeClient := &egressfirewallfake.Clientset{} egressIPFakeClient := &egressipfake.Clientset{} egressQoSFakeClient := &egressqosfake.Clientset{} + egressServiceFakeClient := &egressservicefake.Clientset{} fakeClient := &util.OVNClientset{ KubeClient: kubeFakeClient, EgressIPClient: egressIPFakeClient, EgressFirewallClient: egressFirewallFakeClient, EgressQoSClient: egressQoSFakeClient, + EgressServiceClient: egressServiceFakeClient, } vlanID := 1024 @@ -1079,11 +1086,13 @@ var _ = ginkgo.Describe("Hybrid SDN Master Operations", func() { egressFirewallFakeClient := &egressfirewallfake.Clientset{} egressIPFakeClient := &egressipfake.Clientset{} egressQoSFakeClient := &egressqosfake.Clientset{} + egressServiceFakeClient := &egressservicefake.Clientset{} fakeClient := &util.OVNMasterClientset{ KubeClient: kubeFakeClient, EgressIPClient: egressIPFakeClient, EgressFirewallClient: egressFirewallFakeClient, EgressQoSClient: egressQoSFakeClient, + EgressServiceClient: egressServiceFakeClient, } vlanID := 1024 diff --git a/go-controller/pkg/ovn/master_test.go b/go-controller/pkg/ovn/master_test.go index 702124db2a..32fae892db 100644 --- a/go-controller/pkg/ovn/master_test.go +++ b/go-controller/pkg/ovn/master_test.go @@ -17,6 +17,7 @@ import ( egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" + egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" @@ -985,11 +986,13 @@ var _ = ginkgo.Describe("Default network controller operations", func() { egressFirewallFakeClient := &egressfirewallfake.Clientset{} egressIPFakeClient := &egressipfake.Clientset{} egressQoSFakeClient := &egressqosfake.Clientset{} + egressServiceFakeClient := &egressservicefake.Clientset{} fakeClient = &util.OVNMasterClientset{ KubeClient: kubeFakeClient, EgressIPClient: egressIPFakeClient, EgressFirewallClient: egressFirewallFakeClient, EgressQoSClient: egressQoSFakeClient, + EgressServiceClient: egressServiceFakeClient, } var err error diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index 46e8569731..7acde5b88a 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -455,9 +455,7 @@ func (oc *DefaultNetworkController) StartServiceController(wg *sync.WaitGroup, r return nil } -func newEgressServiceController(client clientset.Interface, nbClient libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory, - svcFactory informers.SharedInformerFactory, stopCh <-chan struct{}, controllerName string) (*egresssvc.Controller, error) { - +func (oc *DefaultNetworkController) InitEgressServiceController() (*egresssvc.Controller, error) { // If the EgressIP controller is enabled it will take care of creating the // "no reroute" policies - we can pass "noop" functions to the egress service controller. initClusterEgressPolicies := func(libovsdbclient.Client, addressset.AddressSetFactory, string) error { return nil } @@ -491,10 +489,10 @@ func newEgressServiceController(client clientset.Interface, nbClient libovsdbcli return isReachableViaGRPC(mgmtIPs, healthClient, hcPort, timeout) } - return egresssvc.NewController(controllerName, client, nbClient, addressSetFactory, - initClusterEgressPolicies, createNodeNoReroutePolicies, - deleteNodeNoReroutePolicies, deleteLegacyDefaultNoRerouteNodePolicies, isReachable, - stopCh, svcFactory.Core().V1().Services(), - svcFactory.Discovery().V1().EndpointSlices(), - svcFactory.Core().V1().Nodes()) + return egresssvc.NewController(DefaultNetworkControllerName, oc.client, oc.nbClient, oc.addressSetFactory, + initClusterEgressPolicies, createNodeNoReroutePolicies, deleteNodeNoReroutePolicies, deleteLegacyDefaultNoRerouteNodePolicies, oc.kube.UpdateEgressServiceStatus, + isReachable, + oc.stopChan, oc.watchFactory.EgressServiceInformer(), oc.svcFactory.Core().V1().Services(), + oc.svcFactory.Discovery().V1().EndpointSlices(), + oc.svcFactory.Core().V1().Nodes()) } diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index 1f9a548f45..06b2a4c7ee 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -21,6 +21,8 @@ import ( egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" egressqos "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1" egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" + egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" @@ -97,6 +99,7 @@ func (o *FakeOVN) start(objects ...runtime.Object) { egressFirewallObjects := []runtime.Object{} egressQoSObjects := []runtime.Object{} multiNetworkPolicyObjects := []runtime.Object{} + egressServiceObjects := []runtime.Object{} v1Objects := []runtime.Object{} nads := []*nettypes.NetworkAttachmentDefinition{} for _, object := range objects { @@ -112,6 +115,8 @@ func (o *FakeOVN) start(objects ...runtime.Object) { for i := range nadList.Items { nads = append(nads, &nadList.Items[i]) } + } else if _, isEgressServiceObject := object.(*egressservice.EgressServiceList); isEgressServiceObject { + egressServiceObjects = append(egressServiceObjects, object) } else { v1Objects = append(v1Objects, object) } @@ -122,6 +127,7 @@ func (o *FakeOVN) start(objects ...runtime.Object) { EgressFirewallClient: egressfirewallfake.NewSimpleClientset(egressFirewallObjects...), EgressQoSClient: egressqosfake.NewSimpleClientset(egressQoSObjects...), MultiNetworkPolicyClient: mnpfake.NewSimpleClientset(multiNetworkPolicyObjects...), + EgressServiceClient: egressservicefake.NewSimpleClientset(egressServiceObjects...), } o.init(nads) } @@ -220,6 +226,7 @@ func NewOvnController(ovnClient *util.OVNMasterClientset, wf *factory.WatchFacto EIPClient: ovnClient.EgressIPClient, EgressFirewallClient: ovnClient.EgressFirewallClient, CloudNetworkClient: ovnClient.CloudNetworkClient, + EgressServiceClient: ovnClient.EgressServiceClient, }, wf, recorder, diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 668acbb2cb..f531ec11b0 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -34,6 +34,7 @@ import ( egressfirewallclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned" egressipclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned" egressqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned" + egressserviceclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ) @@ -46,6 +47,7 @@ type OVNClientset struct { EgressQoSClient egressqosclientset.Interface NetworkAttchDefClient networkattchmentdefclientset.Interface MultiNetworkPolicyClient multinetworkpolicyclientset.Interface + EgressServiceClient egressserviceclientset.Interface } // OVNMasterClientset @@ -56,6 +58,7 @@ type OVNMasterClientset struct { CloudNetworkClient ocpcloudnetworkclientset.Interface EgressQoSClient egressqosclientset.Interface MultiNetworkPolicyClient multinetworkpolicyclientset.Interface + EgressServiceClient egressserviceclientset.Interface } type OVNNodeClientset struct { @@ -93,7 +96,8 @@ func (cs *OVNClientset) GetNodeClientset() *OVNNodeClientset { func (cs *OVNMasterClientset) GetNodeClientset() *OVNNodeClientset { return &OVNNodeClientset{ - KubeClient: cs.KubeClient, + KubeClient: cs.KubeClient, + EgressServiceClient: cs.EgressServiceClient, } } @@ -206,6 +210,11 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { return nil, err } + egressserviceClientset, err := egressserviceclientset.NewForConfig(kconfig) + if err != nil { + return nil, err + } + return &OVNClientset{ KubeClient: kclientset, EgressIPClient: egressIPClientset, @@ -214,6 +223,7 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { EgressQoSClient: egressqosClientset, NetworkAttchDefClient: networkAttchmntDefClientset, MultiNetworkPolicyClient: multiNetworkPolicyClientset, + EgressServiceClient: egressserviceClientset, }, nil } From 96fc896f0a4be128a2efcfedfa52c9983f3faec7 Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Tue, 17 Jan 2023 10:35:59 +0200 Subject: [PATCH 30/90] EgressService refactor: ovnkube-node side * Move iptables rules code to its own package * Have all egress service code in a separate controller * Add Network logic Signed-off-by: Ori Braunshtein --- .../egressservice/egressservice.go | 982 ++++++++++++++++++ .../egressservice/endpointslice.go | 86 ++ .../node/controllers/egressservice/service.go | 109 ++ .../node/default_node_network_controller.go | 18 +- go-controller/pkg/node/egress_service_test.go | 512 +++++++++ .../pkg/node/gateway_init_linux_test.go | 4 +- go-controller/pkg/node/gateway_iptables.go | 369 +++---- go-controller/pkg/node/gateway_localnet.go | 2 +- .../pkg/node/gateway_localnet_linux_test.go | 229 +--- go-controller/pkg/node/gateway_shared_intf.go | 115 +- .../pkg/node/gateway_shared_intf_linux.go | 114 +- go-controller/pkg/node/iptables/iptables.go | 82 ++ .../pkg/node/management-port_linux_test.go | 5 +- go-controller/pkg/node/ovn_test.go | 13 +- go-controller/pkg/util/kube.go | 7 +- 15 files changed, 1998 insertions(+), 649 deletions(-) create mode 100644 go-controller/pkg/node/controllers/egressservice/egressservice.go create mode 100644 go-controller/pkg/node/controllers/egressservice/endpointslice.go create mode 100644 go-controller/pkg/node/controllers/egressservice/service.go create mode 100644 go-controller/pkg/node/egress_service_test.go create mode 100644 go-controller/pkg/node/iptables/iptables.go diff --git a/go-controller/pkg/node/controllers/egressservice/egressservice.go b/go-controller/pkg/node/controllers/egressservice/egressservice.go new file mode 100644 index 0000000000..0a952bda27 --- /dev/null +++ b/go-controller/pkg/node/controllers/egressservice/egressservice.go @@ -0,0 +1,982 @@ +package egressservice + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" + "sync" + "time" + + "github.com/coreos/go-iptables/iptables" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressserviceapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + egressserviceinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1" + egressservicelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/services" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + corev1 "k8s.io/api/core/v1" + discoveryv1 "k8s.io/api/discovery/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + corelisters "k8s.io/client-go/listers/core/v1" + discoverylisters "k8s.io/client-go/listers/discovery/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" +) + +const ( + Chain = "OVN-KUBE-EGRESS-SVC" // called from nat-POSTROUTING + IPRulePriority = 5000 // the priority of the ip rules created by the controller +) + +type Controller struct { + stopCh <-chan struct{} + sync.Mutex + // Packets coming with this mark should not be SNATed. + // In particular, this mark is set by an ovn lrp when the pod is local to the + // host node and tries to reach another node and in that case we don't want + // to snat its traffic, which matches the behavior of pods on different nodes + // when they try to reach a different node but hit the 102 "allow" lrp. + // See https://github.com/ovn-org/ovn-kubernetes/pull/3064 for more details. + returnMark string + thisNode string // name of the node we're running on + + egressServiceLister egressservicelisters.EgressServiceLister + egressServiceSynced cache.InformerSynced + egressServiceQueue workqueue.RateLimitingInterface + + serviceLister corelisters.ServiceLister + servicesSynced cache.InformerSynced + + endpointSliceLister discoverylisters.EndpointSliceLister + endpointSlicesSynced cache.InformerSynced + + services map[string]*svcState // svc key -> state +} + +type svcState struct { + v4LB string // IPv4 ingress of the service + v4Eps sets.Set[string] // v4 endpoints that have an SNAT rule configured + v6LB string // IPv6 ingress of the service + v6Eps sets.Set[string] // v6 endpoints that have an SNAT rule configured + net string // net corresponding to the spec.Network + netEps sets.Set[string] // All endpoints that have an ip rule configured + + stale bool +} + +func NewController(stopCh <-chan struct{}, returnMark, thisNode string, + esInformer egressserviceinformer.EgressServiceInformer, + serviceInformer cache.SharedIndexInformer, + endpointSliceInformer cache.SharedIndexInformer) (*Controller, error) { + klog.Info("Setting up event handlers for Egress Services") + + c := &Controller{ + stopCh: stopCh, + returnMark: returnMark, + thisNode: thisNode, + services: map[string]*svcState{}, + } + + c.egressServiceLister = esInformer.Lister() + c.egressServiceSynced = esInformer.Informer().HasSynced + c.egressServiceQueue = workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(1*time.Second, 5*time.Second, 5), + "egressservices", + ) + _, err := esInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onEgressServiceAdd, + UpdateFunc: c.onEgressServiceUpdate, + DeleteFunc: c.onEgressServiceDelete, + })) + if err != nil { + return nil, err + } + + c.serviceLister = corelisters.NewServiceLister(serviceInformer.GetIndexer()) + c.servicesSynced = serviceInformer.HasSynced + _, err = serviceInformer.AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onServiceAdd, + UpdateFunc: c.onServiceUpdate, + DeleteFunc: c.onServiceDelete, + })) + if err != nil { + return nil, err + } + + c.endpointSliceLister = discoverylisters.NewEndpointSliceLister(endpointSliceInformer.GetIndexer()) + c.endpointSlicesSynced = endpointSliceInformer.HasSynced + _, err = endpointSliceInformer.AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onEndpointSliceAdd, + UpdateFunc: c.onEndpointSliceUpdate, + DeleteFunc: c.onEndpointSliceDelete, + })) + if err != nil { + return nil, err + } + + return c, nil +} + +// onEgressServiceAdd queues the EgressService for processing. +func (c *Controller) onEgressServiceAdd(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + c.egressServiceQueue.Add(key) +} + +// onEgressServiceUpdate queues the EgressService for processing. +func (c *Controller) onEgressServiceUpdate(oldObj, newObj interface{}) { + oldEQ := oldObj.(*egressserviceapi.EgressService) + newEQ := newObj.(*egressserviceapi.EgressService) + + if oldEQ.ResourceVersion == newEQ.ResourceVersion || + !newEQ.GetDeletionTimestamp().IsZero() { + return + } + + key, err := cache.MetaNamespaceKeyFunc(newObj) + if err == nil { + c.egressServiceQueue.Add(key) + } +} + +// onEgressServiceDelete queues the EgressService for processing. +func (c *Controller) onEgressServiceDelete(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + c.egressServiceQueue.Add(key) +} + +func (c *Controller) Run(threadiness int) { + defer utilruntime.HandleCrash() + + klog.Infof("Starting Egress Services Controller") + + if !cache.WaitForNamedCacheSync("egressservices", c.stopCh, c.egressServiceSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("egressservices_services", c.stopCh, c.servicesSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("egressservices_endpointslices", c.stopCh, c.endpointSlicesSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + klog.Infof("Repairing Egress Services") + err := c.repair() + if err != nil { + klog.Errorf("Failed to repair Egress Services entries: %v", err) + } + + wg := &sync.WaitGroup{} + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + c.runEgressServiceWorker(wg) + }, time.Second, c.stopCh) + }() + } + + // wait until we're told to stop + <-c.stopCh + + klog.Infof("Shutting down Egress Services controller") + c.egressServiceQueue.ShutDown() + + wg.Wait() +} + +// Removes stale iptables/ip rules, updates the controller cache with the correct existing ones. +func (c *Controller) repair() error { + c.Lock() + defer c.Unlock() + + // all the current endpoints to valid egress services keys + v4EndpointsToSvcKey := map[string]string{} + v6EndpointsToSvcKey := map[string]string{} + + // all the current cluster ips to valid egress services keys + cipsToSvcKey := map[string]string{} + + services, err := c.serviceLister.List(labels.Everything()) + if err != nil { + return err + } + allServices := map[string]*corev1.Service{} + for _, s := range services { + key, err := cache.MetaNamespaceKeyFunc(s) + if err != nil { + klog.Errorf("Failed to read Service key: %v", err) + continue + } + allServices[key] = s + } + + egressServices, err := c.egressServiceLister.List(labels.Everything()) + if err != nil { + return err + } + for _, es := range egressServices { + key, err := cache.MetaNamespaceKeyFunc(es) + if err != nil { + klog.Errorf("Failed to read EgressService key: %v", err) + continue + } + svc := allServices[key] + + if svc == nil { + continue + } + + if !c.shouldConfigureEgressSVC(svc, es.Status.Host) { + continue + } + + v4, v6, err := c.allEndpointsFor(svc) + if err != nil { + klog.Errorf("Failed to fetch endpoints: %v", err) + continue + } + + for _, ep := range v4.UnsortedList() { + v4EndpointsToSvcKey[ep] = key + } + + for _, ep := range v6.UnsortedList() { + v6EndpointsToSvcKey[ep] = key + } + + v4LB, v6LB := "", "" + for _, ip := range svc.Status.LoadBalancer.Ingress { + if utilnet.IsIPv4String(ip.IP) { + v4LB = ip.IP + continue + } + v6LB = ip.IP + } + + for _, cip := range util.GetClusterIPs(svc) { + cipsToSvcKey[cip] = key + } + + c.services[key] = &svcState{ + v4LB: v4LB, + v4Eps: sets.New[string](), + v6LB: v6LB, + v6Eps: sets.New[string](), + net: es.Spec.Network, + netEps: sets.New[string](), + stale: false, + } + } + + errorList := []error{} + err = c.repairIPRules(v4EndpointsToSvcKey, v6EndpointsToSvcKey, cipsToSvcKey) + if err != nil { + errorList = append(errorList, err) + } + + err = c.repairIPTables(v4EndpointsToSvcKey, v6EndpointsToSvcKey) + if err != nil { + errorList = append(errorList, err) + } + + return errors.NewAggregate(errorList) +} + +// Remove stale ip rules, update caches with valid existing ones. +// Valid ip rules in this context are those that belong to an existing EgressService, their +// src points to either an existing ep or cip of the service and the routing table matches the +// Network field of the service. +func (c *Controller) repairIPRules(v4EpsToServices, v6EpsToServices, cipsToServices map[string]string) error { + type IPRule struct { + Priority int32 `json:"priority"` + Src string `json:"src"` + Table string `json:"table"` + } + + repairRules := func(family string) error { + epsToSvcKey := v4EpsToServices + if family == "-6" { + epsToSvcKey = v6EpsToServices + } + + allIPRules := []IPRule{} + ipRulesToDelete := []IPRule{} + + stdout, stderr, err := util.RunIP(family, "--json", "rule", "show") + if err != nil { + return fmt.Errorf("could not list %s rules - stdout: %s, stderr: %s, err: %v", family, stdout, stderr, err) + } + + err = json.Unmarshal([]byte(stdout), &allIPRules) + if err != nil { + return err + } + + for _, rule := range allIPRules { + if rule.Priority != IPRulePriority { + // the priority isn't the fixed one for the controller + continue + } + + svcKey, found := epsToSvcKey[rule.Src] + if !found { + svcKey, found = cipsToServices[rule.Src] + if !found { + // no service matches this ep + ipRulesToDelete = append(ipRulesToDelete, rule) + continue + } + } + + state := c.services[svcKey] + if state == nil { + // the rule belongs to a service that is no longer valid + ipRulesToDelete = append(ipRulesToDelete, rule) + continue + } + + if state.net != rule.Table { + // the rule points to the wrong routing table + ipRulesToDelete = append(ipRulesToDelete, rule) + continue + } + + // the rule is valid, we update the service's cache to not reconfigure it later. + state.netEps.Insert(rule.Src) + } + + errorList := []error{} + for _, rule := range ipRulesToDelete { + err := deleteIPRule(family, rule.Priority, rule.Src, rule.Table) + if err != nil { + errorList = append(errorList, err) + } + } + + return errors.NewAggregate(errorList) + } + + errorList := []error{} + if config.IPv4Mode { + err := repairRules("-4") + if err != nil { + errorList = append(errorList, err) + } + } + + if config.IPv6Mode { + err := repairRules("-6") + if err != nil { + errorList = append(errorList, err) + } + } + + return errors.NewAggregate(errorList) +} + +// Remove stale iptables rules, update caches with valid existing ones. +// In addition, verify that the first rule in the Chain matches the "returnMark": +// packets coming with that mark should not be evaluated for SNATing = we make sure +// the first rule in the Chain is a RETURN one for this mark. +// Valid iptables rules in this context are those that belong to an existing EgressService, their +// source points to an existing ep the service and the SNAT matches the service's LB. +func (c *Controller) repairIPTables(v4EpsToServices, v6EpsToServices map[string]string) error { + type esvcIPTRule struct { + svcKey string + ep string + lb string + mark int32 + } + + parseIPTRule := func(rule string) (*esvcIPTRule, error) { + split := strings.Fields(rule) + if len(split)%2 != 0 { + return nil, fmt.Errorf("expected rule %s to have a key-value format", rule) + } + + args := map[string]string{} + for i := 0; i < len(split); i += 2 { + args[split[i]] = split[i+1] + } + + svcKey := args["--comment"] + ep := args["-s"] + lb := args["--to-source"] + + strMark := args["--mark"] + mark := int32(0) + if strMark != "" { + parsedFWMark, err := strconv.ParseInt(strMark, 0, 32) + if err != nil { + return nil, fmt.Errorf("could not parse fwmark for rule %s, err: %w", rule, err) + } + mark = int32(parsedFWMark) + } + + return &esvcIPTRule{svcKey: svcKey, ep: ep, lb: lb, mark: mark}, nil + } + + snatRepair := func(proto iptables.Protocol, epsToSvcs map[string]string) error { + defaultFirstRule := c.defaultReturnRule(proto) // fetch the rule that should be first in the chain + + ipt, err := util.GetIPTablesHelper(proto) + if err != nil { + return err + } + + snatRules, err := ipt.List("nat", Chain) + if err != nil { + return err + } + + // we verify that the first rule is the "do not snat" one + if len(snatRules) == 0 { + return nodeipt.AddRules([]nodeipt.Rule{defaultFirstRule}, true) + } + + parsedFWMark, err := strconv.ParseInt(c.returnMark, 0, 0) + if err != nil { + return fmt.Errorf("could not parse %s as default return mark for egress services, err: %w", c.returnMark, err) + } + mark := int32(parsedFWMark) + + firstRule := snatRules[0] + parsed, err := parseIPTRule(firstRule) + doNotSNATAdded := false + if err != nil || parsed.mark != mark { + // The first rule is either malformed (err != nil) or it does not match + // the correct mark. In either case, we should add the correct rule anyways + // to be sure it is the first one. + err := nodeipt.AddRules([]nodeipt.Rule{defaultFirstRule}, false) + if err != nil { + return err + } + doNotSNATAdded = true + } + if !doNotSNATAdded { + // the do not snat rule was already present at the first position + snatRules = snatRules[1:] + } + + // now we run over the existing rules to determine which should be deleted + // and update the cache accordingly + rulesToDel := []string{} + for _, rule := range snatRules { + parsed, err := parseIPTRule(rule) + if err != nil { + // the rule is malformed + rulesToDel = append(rulesToDel, rule) + continue + } + + svcKey := epsToSvcs[parsed.ep] + if svcKey != parsed.svcKey { + // the rule matches the wrong service + rulesToDel = append(rulesToDel, rule) + continue + } + + svcState, found := c.services[svcKey] + if !found { + // the rule matches a service that is no longer valid + rulesToDel = append(rulesToDel, rule) + continue + } + + lbToCompare := svcState.v4LB + epsToAdd := svcState.v4Eps + if proto == iptables.ProtocolIPv6 { + lbToCompare = svcState.v6LB + epsToAdd = svcState.v6Eps + } + + if lbToCompare != parsed.lb { + // the rule SNATs to the wrong IP + rulesToDel = append(rulesToDel, rule) + continue + } + + // the rule is valid, we update the service's cache to not reconfigure it later. + epsToAdd.Insert(parsed.ep) + } + + errorList := []error{} + for _, rule := range rulesToDel { + args := strings.Fields(rule) + err := ipt.Delete("nat", Chain, args...) + if err != nil { + errorList = append(errorList, err) + } + } + + return errors.NewAggregate(errorList) + } + + errorList := []error{} + if config.IPv4Mode { + ipt, err := util.GetIPTablesHelper(iptables.ProtocolIPv4) + if err != nil { + errorList = append(errorList, err) + } + + err = ipt.NewChain("nat", Chain) + if err != nil { + klog.V(5).Infof("Could not create egress service nat chain: %v", err) + } + + err = snatRepair(iptables.ProtocolIPv4, v4EpsToServices) + if err != nil { + errorList = append(errorList, err) + } + + } + + if config.IPv6Mode { + ipt, err := util.GetIPTablesHelper(iptables.ProtocolIPv4) + if err != nil { + errorList = append(errorList, err) + } + + err = ipt.NewChain("nat", Chain) + if err != nil { + klog.V(5).Infof("Could not create egress service nat chain: %v", err) + } + + err = snatRepair(iptables.ProtocolIPv6, v6EpsToServices) + if err != nil { + errorList = append(errorList, err) + } + } + + return errors.NewAggregate(errorList) +} + +func (c *Controller) runEgressServiceWorker(wg *sync.WaitGroup) { + for c.processNextEgressServiceWorkItem(wg) { + } +} + +func (c *Controller) processNextEgressServiceWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + key, quit := c.egressServiceQueue.Get() + if quit { + return false + } + + defer c.egressServiceQueue.Done(key) + + err := c.syncEgressService(key.(string)) + if err == nil { + c.egressServiceQueue.Forget(key) + return true + } + + utilruntime.HandleError(fmt.Errorf("%v failed with : %v", key, err)) + + if c.egressServiceQueue.NumRequeues(key) < 10 { + c.egressServiceQueue.AddRateLimited(key) + return true + } + + c.egressServiceQueue.Forget(key) + return true +} + +func (c *Controller) syncEgressService(key string) error { + c.Lock() + defer c.Unlock() + + startTime := time.Now() + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + klog.Infof("Processing sync for EgressService %s/%s", namespace, name) + + defer func() { + klog.V(4).Infof("Finished syncing EgressService %s on namespace %s : %v", name, namespace, time.Since(startTime)) + }() + + es, err := c.egressServiceLister.EgressServices(namespace).Get(name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + + svc, err := c.serviceLister.Services(namespace).Get(name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + + cachedState := c.services[key] + if svc == nil && cachedState == nil { + return nil + } + + if svc == nil && cachedState != nil { + return c.clearServiceRulesAndRequeue(key, cachedState) + } + + if es == nil && cachedState == nil { + return nil + } + + if es == nil && cachedState != nil { + return c.clearServiceRulesAndRequeue(key, cachedState) + } + + if cachedState != nil && cachedState.stale { + // The service is marked stale because something failed when trying to delete it. + // We try to delete it again before doing anything else. + return c.clearServiceRulesAndRequeue(key, cachedState) + } + + // At this point both the svc and es are not nil + shouldConfigure := c.shouldConfigureEgressSVC(svc, es.Status.Host) + if cachedState == nil && !shouldConfigure { + return nil + } + + if cachedState != nil && !shouldConfigure { + return c.clearServiceRulesAndRequeue(key, cachedState) + } + + lbsChanged := false + v4LB, v6LB := "", "" + for _, ip := range svc.Status.LoadBalancer.Ingress { + if utilnet.IsIPv4String(ip.IP) { + v4LB = ip.IP + continue + } + v6LB = ip.IP + } + + if cachedState != nil { + lbsChanged = v4LB != cachedState.v4LB || v6LB != cachedState.v6LB + } + + if lbsChanged { + err := c.clearServiceSNATRules(key, cachedState) + if err != nil { + return err + } + } + + if cachedState == nil { + cachedState = &svcState{ + v4Eps: sets.New[string](), + v6Eps: sets.New[string](), + netEps: sets.New[string](), + stale: false, + } + c.services[key] = cachedState + } + cachedState.v4LB = v4LB + cachedState.v6LB = v6LB + + v4Eps, v6Eps, err := c.allEndpointsFor(svc) + if err != nil { + return err + } + + v4ToAdd := v4Eps.Difference(cachedState.v4Eps) + v6ToAdd := v6Eps.Difference(cachedState.v6Eps) + v4ToDelete := cachedState.v4Eps.Difference(v4Eps) + v6ToDelete := cachedState.v6Eps.Difference(v6Eps) + + if cachedState.v4LB != "" { + for ep := range v4ToAdd { + err := nodeipt.AddRules([]nodeipt.Rule{snatIPTRuleFor(key, cachedState.v4LB, ep)}, true) + if err != nil { + return err + } + cachedState.v4Eps.Insert(ep) + } + + for ep := range v4ToDelete { + err := nodeipt.DelRules([]nodeipt.Rule{snatIPTRuleFor(key, cachedState.v4LB, ep)}) + if err != nil { + return err + } + + cachedState.v4Eps.Delete(ep) + } + } + + if cachedState.v6LB != "" { + for ep := range v6ToAdd { + err := nodeipt.AddRules([]nodeipt.Rule{snatIPTRuleFor(key, cachedState.v6LB, ep)}, true) + if err != nil { + return err + } + + cachedState.v6Eps.Insert(ep) + } + + for ep := range v6ToDelete { + err := nodeipt.DelRules([]nodeipt.Rule{snatIPTRuleFor(key, cachedState.v6LB, ep)}) + if err != nil { + return err + } + + cachedState.v6Eps.Delete(ep) + } + } + + // At this point we finished handling the SNAT rules + // Now we create the relevant ip rules according to the object's "Network" + + if es.Spec.Network != cachedState.net { + err := c.clearServiceIPRules(cachedState) + if err != nil { + return err + } + } + cachedState.net = es.Spec.Network + + if cachedState.net == "" { + return nil + } + + allEps := v4Eps.Union(v6Eps) + + for _, cip := range util.GetClusterIPs(svc) { + allEps.Insert(cip) + } + + ipRulesToAdd := allEps.Difference(cachedState.netEps) + ipRulesToDelete := cachedState.netEps.Difference(allEps) + + for ip := range ipRulesToAdd { + family := "-4" + if utilnet.IsIPv6String(ip) { + family = "-6" + } + + err := createIPRule(family, IPRulePriority, ip, cachedState.net) + if err != nil { + return err + } + + cachedState.netEps.Insert(ip) + } + + for ip := range ipRulesToDelete { + family := "-4" + if utilnet.IsIPv6String(ip) { + family = "-6" + } + + err := deleteIPRule(family, IPRulePriority, ip, cachedState.net) + if err != nil { + return err + } + + cachedState.netEps.Delete(ip) + } + + return nil +} + +// Returns all of the non-host endpoints for the given service grouped by IPv4/IPv6. +func (c *Controller) allEndpointsFor(svc *corev1.Service) (sets.Set[string], sets.Set[string], error) { + // Get the endpoint slices associated to the Service + esLabelSelector := labels.Set(map[string]string{ + discoveryv1.LabelServiceName: svc.Name, + }).AsSelectorPreValidated() + + endpointSlices, err := c.endpointSliceLister.EndpointSlices(svc.Namespace).List(esLabelSelector) + if err != nil { + return nil, nil, err + } + + v4Endpoints := sets.New[string]() + v6Endpoints := sets.New[string]() + + for _, eps := range endpointSlices { + if eps.AddressType == discoveryv1.AddressTypeFQDN { + continue + } + + epsToInsert := v4Endpoints + if eps.AddressType == discoveryv1.AddressTypeIPv6 { + epsToInsert = v6Endpoints + } + + for _, ep := range eps.Endpoints { + for _, ip := range ep.Addresses { + ipStr := utilnet.ParseIPSloppy(ip).String() + if !services.IsHostEndpoint(ipStr) { + epsToInsert.Insert(ipStr) + } + } + } + } + + return v4Endpoints, v6Endpoints, nil +} + +// Clears all of the SNAT rules of the service. +func (c *Controller) clearServiceSNATRules(key string, state *svcState) error { + for ip := range state.v4Eps { + err := nodeipt.DelRules([]nodeipt.Rule{snatIPTRuleFor(key, state.v4LB, ip)}) + if err != nil { + return err + } + + state.v4Eps.Delete(ip) + } + state.v4LB = "" + + for ip := range state.v6Eps { + err := nodeipt.DelRules([]nodeipt.Rule{snatIPTRuleFor(key, state.v6LB, ip)}) + if err != nil { + return err + } + + state.v6Eps.Delete(ip) + } + state.v6LB = "" + + return nil +} + +// Clears all of the FWMark rules of the service. +func (c *Controller) clearServiceIPRules(state *svcState) error { + errorList := []error{} + for ip := range state.netEps { + family := "-4" + if utilnet.IsIPv6String(ip) { + family = "-6" + } + + err := deleteIPRule(family, IPRulePriority, ip, state.net) + if err != nil { + errorList = append(errorList, err) + continue + } + + state.netEps.Delete(ip) + } + + return errors.NewAggregate(errorList) +} + +// Clears all of the iptables rules that relate to the service and removes it from the cache. +func (c *Controller) clearServiceRulesAndRequeue(key string, state *svcState) error { + state.stale = true + + err := c.clearServiceSNATRules(key, state) + if err != nil { + return err + } + + err = c.clearServiceIPRules(state) + if err != nil { + return err + } + + delete(c.services, key) + c.egressServiceQueue.Add(key) + + return nil +} + +// Returns true if the controller should configure the given service as an "Egress Service" +func (c *Controller) shouldConfigureEgressSVC(svc *corev1.Service, svcHost string) bool { + return svcHost == c.thisNode && + svc.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(svc.Status.LoadBalancer.Ingress) > 0 +} + +// Create ip rule with the given fields. +func createIPRule(family string, priority int32, src, table string) error { + prio := fmt.Sprintf("%d", priority) + stdout, stderr, err := util.RunIP(family, "rule", "add", "prio", prio, "from", src, "table", table) + if err != nil && !strings.Contains(stderr, "File exists") { + return fmt.Errorf("could not add rule for src %s table %s - stdout: %s, stderr: %s, err: %v", src, table, stdout, stderr, err) + } + + return nil +} + +// Delete ip rule with the given fields. +func deleteIPRule(family string, priority int32, src, table string) error { + prio := fmt.Sprintf("%d", priority) + stdout, stderr, err := util.RunIP(family, "rule", "del", "prio", prio, "from", src, "table", table) + if err != nil && !strings.Contains(stderr, "No such file or directory") { + return fmt.Errorf("could not delete rule for src %s table %s - stdout: %s, stderr: %s, err: %v", src, table, stdout, stderr, err) + } + + return nil +} + +// Returns the SNAT rule that should be created for the given lb/endpoint +func snatIPTRuleFor(comment string, lb, ip string) nodeipt.Rule { + return nodeipt.Rule{ + Table: "nat", + Chain: Chain, + Args: []string{ + "-s", ip, + "-m", "comment", "--comment", comment, + "-j", "SNAT", + "--to-source", lb, + }, + Protocol: getIPTablesProtocol(ip), + } +} + +// getIPTablesProtocol returns the IPTables protocol matching the protocol (v4/v6) of provided IP string +func getIPTablesProtocol(ip string) iptables.Protocol { + if utilnet.IsIPv6String(ip) { + return iptables.ProtocolIPv6 + } + return iptables.ProtocolIPv4 +} + +// Returns the rule that should be first in the Chain. +// Packets coming with the controller's "returnMark" should not be evaluated for SNATing. +// The rule here is a "RETURN" for these packets. +func (c *Controller) defaultReturnRule(proto iptables.Protocol) nodeipt.Rule { + return nodeipt.Rule{ + Table: "nat", + Chain: Chain, + Args: []string{ + "-m", "mark", "--mark", string(c.returnMark), + "-m", "comment", "--comment", "DoNotSNAT", + "-j", "RETURN", + }, + Protocol: proto, + } +} diff --git a/go-controller/pkg/node/controllers/egressservice/endpointslice.go b/go-controller/pkg/node/controllers/egressservice/endpointslice.go new file mode 100644 index 0000000000..cdd7e3f42d --- /dev/null +++ b/go-controller/pkg/node/controllers/egressservice/endpointslice.go @@ -0,0 +1,86 @@ +package egressservice + +import ( + "errors" + "fmt" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/services" + discovery "k8s.io/api/discovery/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +/* + Pretty much a copy of what the services controller does with endpointslices. + The main difference is that we queue an endpointslice's service only if + it is already in our local cache of known egress services: + If it is not there it is either not an egress service or it was not reconciled + yet and when it does the endpoint slice change will be included. +*/ + +func (c *Controller) onEndpointSliceAdd(obj interface{}) { + endpointSlice := obj.(*discovery.EndpointSlice) + if endpointSlice == nil { + utilruntime.HandleError(fmt.Errorf("invalid EndpointSlice provided to onEndpointSliceAdd()")) + return + } + c.queueServiceForEndpointSlice(endpointSlice) +} + +func (c *Controller) onEndpointSliceUpdate(prevObj, obj interface{}) { + prevEndpointSlice := prevObj.(*discovery.EndpointSlice) + endpointSlice := obj.(*discovery.EndpointSlice) + + // don't process resync or objects that are marked for deletion + if prevEndpointSlice.ResourceVersion == endpointSlice.ResourceVersion || + !endpointSlice.GetDeletionTimestamp().IsZero() { + return + } + c.queueServiceForEndpointSlice(endpointSlice) +} + +func (c *Controller) onEndpointSliceDelete(obj interface{}) { + endpointSlice, ok := obj.(*discovery.EndpointSlice) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %#v", obj)) + return + } + endpointSlice, ok = tombstone.Obj.(*discovery.EndpointSlice) + if !ok { + utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a EndpointSlice: %#v", obj)) + return + } + } + + if endpointSlice != nil { + c.queueServiceForEndpointSlice(endpointSlice) + } +} + +func (c *Controller) queueServiceForEndpointSlice(endpointSlice *discovery.EndpointSlice) { + key, err := services.ServiceControllerKey(endpointSlice) + if err != nil { + // Do not log endpointsSlices missing service labels as errors. + // Once the service label is eventually added, we will get this event + // and re-process. + if errors.Is(err, services.NoServiceLabelError) { + klog.V(5).Infof("EgressService endpoint slice missing service label: %v", err) + } else { + utilruntime.HandleError(fmt.Errorf("couldn't get key for EndpointSlice %+v: %v", endpointSlice, err)) + } + return + } + c.Lock() + defer c.Unlock() + _, cached := c.services[key] + if !cached { + klog.V(5).Infof("Ignoring updating %s for endpointslice %s/%s as it is not a known egress service", + key, endpointSlice.Namespace, endpointSlice.Name) + return // we queue a service only if it's in the local caches + } + + c.egressServiceQueue.Add(key) +} diff --git a/go-controller/pkg/node/controllers/egressservice/service.go b/go-controller/pkg/node/controllers/egressservice/service.go new file mode 100644 index 0000000000..58f1c0f109 --- /dev/null +++ b/go-controller/pkg/node/controllers/egressservice/service.go @@ -0,0 +1,109 @@ +package egressservice + +import ( + "fmt" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +func (c *Controller) onServiceAdd(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + + service := obj.(*corev1.Service) + // We only care about new LoadBalancer services with at least one ingress IP + if !util.ServiceTypeHasLoadBalancer(service) || len(service.Status.LoadBalancer.Ingress) == 0 { + return + } + + es, err := c.egressServiceLister.EgressServices(service.Namespace).Get(service.Name) + if err != nil && !apierrors.IsNotFound(err) { + // This shouldn't happen, but we queue the service in case we got an unrelated + // error when the EgressService exists + c.egressServiceQueue.Add(key) + return + } + + // There is no EgressService resource for this service so we don't queue it + if es == nil { + return + } + + klog.V(4).Infof("Adding egress service %s", key) + c.egressServiceQueue.Add(key) +} + +func (c *Controller) onServiceUpdate(oldObj, newObj interface{}) { + oldService := oldObj.(*corev1.Service) + newService := newObj.(*corev1.Service) + + // don't process resync or objects that are marked for deletion + if oldService.ResourceVersion == newService.ResourceVersion || + !newService.GetDeletionTimestamp().IsZero() { + return + } + + // We only care about LoadBalancer service updates that affect egress service functionality + if !util.ServiceTypeHasLoadBalancer(oldService) && !util.ServiceTypeHasLoadBalancer(newService) { + return + } + + key, err := cache.MetaNamespaceKeyFunc(newObj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", newObj, err)) + return + } + + es, err := c.egressServiceLister.EgressServices(newService.Namespace).Get(newService.Name) + if err != nil && !apierrors.IsNotFound(err) { + // This shouldn't happen, but we queue the service in case we got an unrelated + // error when the EgressService exists + c.egressServiceQueue.Add(key) + return + } + + // There is no EgressService resource for this service so we don't queue it + if es == nil { + return + } + + c.egressServiceQueue.Add(key) +} + +func (c *Controller) onServiceDelete(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + + service := obj.(*corev1.Service) + // We only care about deletions of LoadBalancer services + if !util.ServiceTypeHasLoadBalancer(service) { + return + } + + klog.V(4).Infof("Deleting egress service %s", key) + es, err := c.egressServiceLister.EgressServices(service.Namespace).Get(service.Name) + if err != nil && !apierrors.IsNotFound(err) { + // This shouldn't happen, but we queue the service in case we got an unrelated + // error when the EgressService exists + c.egressServiceQueue.Add(key) + return + } + + // There is no EgressService resource for this service so we don't queue it + if es == nil { + return + } + + c.egressServiceQueue.Add(key) +} diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 039240bcb9..dd61bad28a 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -29,7 +29,9 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/informer" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/upgrade" + nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" retry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -832,6 +834,20 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } } + if config.OVNKubernetesFeature.EnableEgressService { + wf := nc.watchFactory.(*factory.WatchFactory) + c, err := egressservice.NewController(nc.stopChan, ovnKubeNodeSNATMark, nc.name, + wf.EgressServiceInformer(), wf.ServiceInformer(), wf.EndpointSliceInformer()) + if err != nil { + return err + } + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + c.Run(1) + }() + } + klog.Infof("Default node network controller initialized and ready.") return nil } @@ -1095,7 +1111,7 @@ func upgradeServiceRoute(routeManager *routeManager, bridgeName string) error { klog.Errorf("Failed to LocalGatewayNATRules: %v", err) } rules := getLocalGatewayNATRules(types.LocalnetGatewayNextHopPort, IPNet) - if err := delIptRules(rules); err != nil { + if err := nodeipt.DelRules(rules); err != nil { klog.Errorf("Failed to LocalGatewayNATRules: %v", err) } } diff --git a/go-controller/pkg/node/egress_service_test.go b/go-controller/pkg/node/egress_service_test.go new file mode 100644 index 0000000000..743bc26c48 --- /dev/null +++ b/go-controller/pkg/node/egress_service_test.go @@ -0,0 +1,512 @@ +package node + +import ( + "context" + "net" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressserviceapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice" + nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + util "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/mocks" + "github.com/urfave/cli/v2" + v1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ = Describe("Egress Service Operations", func() { + var ( + app *cli.App + fakeOvnNode *FakeOVNNode + fExec *ovntest.FakeExec + iptV4 util.IPTablesHelper + netlinkMock *mocks.NetLinkOps + ) + + origNetlinkInst := util.GetNetLinkOps() + + BeforeEach(func() { + // Restore global default values before each testcase + Expect(config.PrepareTestConfig()).To(Succeed()) + netlinkMock = &mocks.NetLinkOps{} + util.SetNetLinkOpMockInst(netlinkMock) + + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + fExec = ovntest.NewLooseCompareFakeExec() + fakeOvnNode = NewFakeOVNNode(fExec) + + config.OVNKubernetesFeature.EnableEgressService = true + _, cidr4, _ := net.ParseCIDR("10.128.0.0/16") + config.Default.ClusterSubnets = []config.CIDRNetworkEntry{{CIDR: cidr4, HostSubnetLength: 24}} + + iptV4, _ = util.SetFakeIPTablesHelpers() + }) + + AfterEach(func() { + fakeOvnNode.shutdown() + util.SetNetLinkOpMockInst(origNetlinkInst) + config.OVNKubernetesFeature.EnableEgressService = false + }) + + Context("on egress service resource changes", func() { + It("repairs iptables and ip rules when stale entries are present", func() { + app.Action = func(ctx *cli.Context) error { + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 --json rule show", + Output: "[{\"priority\":5000,\"src\":\"10.128.0.3\",\"table\":\"wrongTable\"},{\"priority\":5000,\"src\":\"goneEp\",\"table\":\"mynetwork\"},{\"priority\":5000,\"src\":\"10.128.0.3\",\"table\":\"mynetwork\"},{\"priority\":5000,\"src\":\"10.129.0.2\",\"table\":\"mynetwork\"}]", + Err: nil, + }) + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 rule del prio 5000 from 10.128.0.3 table wrongTable", + Err: nil, + }) + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 rule del prio 5000 from goneEp table mynetwork", + Err: nil, + }) + + fakeRules := []nodeipt.Rule{ + { + Table: "nat", + Chain: "OVN-KUBE-EGRESS-SVC", + Args: []string{ + "-m", "mark", "--mark", "0x3f0", + "-m", "comment", "--comment", "DoNotSNAT", + "-j", "RETURN", + }, + }, + { + Table: "nat", + Chain: "OVN-KUBE-EGRESS-SVC", + Args: []string{ + "-s", "10.128.0.3", + "-m", "comment", "--comment", "namespace1/service1", + "-j", "SNAT", + "--to-source", "5.5.5.5", + }, + Protocol: getIPTablesProtocol("5.5.5.5"), + }, + { + Table: "nat", + Chain: "OVN-KUBE-EGRESS-SVC", + Args: []string{ + "-s", "10.128.0.88", // gone ep + "-m", "comment", "--comment", "namespace1/service1", + "-j", "SNAT", + "--to-source", "5.5.5.5", + }, + Protocol: getIPTablesProtocol("5.5.5.5"), + }, + { + Table: "nat", + Chain: "OVN-KUBE-EGRESS-SVC", + Args: []string{ + "-s", "10.128.0.3", + "-m", "comment", "--comment", "namespace1/service1", + "-j", "SNAT", + "--to-source", "5.200.5.12", // wrong lb + }, + Protocol: getIPTablesProtocol("5.5.5.5"), + }, + { + Table: "nat", + Chain: "OVN-KUBE-EGRESS-SVC", + Args: []string{ + "-s", "10.128.0.3", + "-m", "comment", "--comment", "namespace13service6", // gone service + "-j", "SNAT", + "--to-source", "1.2.3.4", + }, + Protocol: getIPTablesProtocol("5.5.5.5"), + }, + } + Expect(appendIptRules(fakeRules)).To(Succeed()) + epPortName := "https" + epPortValue := int32(443) + + egressService := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "service1", + Namespace: "namespace1", + }, + Spec: egressserviceapi.EgressServiceSpec{ + Network: "mynetwork", + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: fakeNodeName, + }, + } + service := *newService("service1", "namespace1", "10.129.0.2", + []v1.ServicePort{ + { + NodePort: int32(31111), + Protocol: v1.ProtocolTCP, + Port: int32(8080), + }, + }, + v1.ServiceTypeLoadBalancer, + []string{}, + v1.ServiceStatus{ + LoadBalancer: v1.LoadBalancerStatus{ + Ingress: []v1.LoadBalancerIngress{{ + IP: "5.5.5.5", + }}, + }, + }, + false, false, + ) + + ep1 := discovery.Endpoint{ + Addresses: []string{"10.128.0.3"}, + } + epPort := discovery.EndpointPort{ + Name: &epPortName, + Port: &epPortValue, + } + + // host-networked endpoint, should not have an SNAT rule created + ep2 := discovery.Endpoint{ + Addresses: []string{"192.168.18.15"}, + NodeName: &fakeNodeName, + } + // endpointSlice.Endpoints is ovn-networked so this will + // come under !hasLocalHostNetEp case + endpointSlice := *newEndpointSlice( + "service1", + "namespace1", + []discovery.Endpoint{ep1, ep2}, + []discovery.EndpointPort{epPort}) + + fakeOvnNode.start(ctx, + &v1.ServiceList{ + Items: []v1.Service{ + service, + }, + }, + &discovery.EndpointSliceList{ + Items: []discovery.EndpointSlice{ + endpointSlice, + }, + }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + egressService, + }, + }, + ) + + wf := fakeOvnNode.watcher.(*factory.WatchFactory) + c, err := egressservice.NewController(fakeOvnNode.stopChan, ovnKubeNodeSNATMark, fakeOvnNode.nc.name, + wf.EgressServiceInformer(), wf.ServiceInformer(), wf.EndpointSliceInformer()) + Expect(err).ToNot(HaveOccurred()) + fakeOvnNode.wg.Add(1) + go func() { + defer fakeOvnNode.wg.Done() + c.Run(1) + }() + + expectedTables := map[string]util.FakeTable{ + "nat": { + "OVN-KUBE-EGRESS-SVC": []string{ + "-m mark --mark 0x3f0 -m comment --comment DoNotSNAT -j RETURN", + "-s 10.128.0.3 -m comment --comment namespace1/service1 -j SNAT --to-source 5.5.5.5", + }, + }, + "filter": {}, + "mangle": {}, + } + + f4 := iptV4.(*util.FakeIPTables) + Eventually(func() error { + return f4.MatchState(expectedTables) + }).ShouldNot(HaveOccurred()) + + Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fakeOvnNode.fakeExec.ErrorDesc) + + return nil + } + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + It("manages iptables rules for LoadBalancer egress service backed by cluster networked pods", func() { + app.Action = func(ctx *cli.Context) error { + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 --json rule show", + Output: "[]", + Err: nil, + }) + + epPortName := "https" + epPortValue := int32(443) + + egressService := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "service1", + Namespace: "namespace1", + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: fakeNodeName, + }, + } + service := *newService("service1", "namespace1", "10.129.0.2", + []v1.ServicePort{ + { + NodePort: int32(31111), + Protocol: v1.ProtocolTCP, + Port: int32(8080), + }, + }, + v1.ServiceTypeLoadBalancer, + []string{}, + v1.ServiceStatus{ + LoadBalancer: v1.LoadBalancerStatus{ + Ingress: []v1.LoadBalancerIngress{{ + IP: "5.5.5.5", + }}, + }, + }, + false, false, + ) + + ep1 := discovery.Endpoint{ + Addresses: []string{"10.128.0.3"}, + } + epPort := discovery.EndpointPort{ + Name: &epPortName, + Port: &epPortValue, + } + + // host-networked endpoint, should not have an SNAT rule created + ep2 := discovery.Endpoint{ + Addresses: []string{"192.168.18.15"}, + NodeName: &fakeNodeName, + } + // endpointSlice.Endpoints is ovn-networked so this will + // come under !hasLocalHostNetEp case + endpointSlice := *newEndpointSlice( + "service1", + "namespace1", + []discovery.Endpoint{ep1, ep2}, + []discovery.EndpointPort{epPort}) + + fakeOvnNode.start(ctx, + &v1.ServiceList{ + Items: []v1.Service{ + service, + }, + }, + &discovery.EndpointSliceList{ + Items: []discovery.EndpointSlice{ + endpointSlice, + }, + }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + egressService, + }, + }, + ) + + wf := fakeOvnNode.watcher.(*factory.WatchFactory) + c, err := egressservice.NewController(fakeOvnNode.stopChan, ovnKubeNodeSNATMark, fakeOvnNode.nc.name, + wf.EgressServiceInformer(), wf.ServiceInformer(), wf.EndpointSliceInformer()) + Expect(err).ToNot(HaveOccurred()) + fakeOvnNode.wg.Add(1) + go func() { + defer fakeOvnNode.wg.Done() + c.Run(1) + }() + + expectedTables := map[string]util.FakeTable{ + "nat": { + "OVN-KUBE-EGRESS-SVC": []string{ + "-m mark --mark 0x3f0 -m comment --comment DoNotSNAT -j RETURN", + "-s 10.128.0.3 -m comment --comment namespace1/service1 -j SNAT --to-source 5.5.5.5", + }, + }, + "filter": {}, + "mangle": {}, + } + + f4 := iptV4.(*util.FakeIPTables) + Eventually(func() error { + return f4.MatchState(expectedTables) + }).ShouldNot(HaveOccurred()) + + expectedTables = map[string]util.FakeTable{ + "nat": { + "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment DoNotSNAT -j RETURN"}, + }, + "filter": {}, + "mangle": {}, + } + + err = fakeOvnNode.fakeClient.EgressServiceClient.K8sV1().EgressServices("namespace1").Delete(context.TODO(), "service1", metav1.DeleteOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func() error { + return f4.MatchState(expectedTables) + }).ShouldNot(HaveOccurred()) + + Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fakeOvnNode.fakeExec.ErrorDesc) + + return nil + } + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + + It("manages iptables/ip rules for LoadBalancer egress service backed by ovn-k pods with Network", func() { + app.Action = func(ctx *cli.Context) error { + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 --json rule show", + Output: "[]", + Err: nil, + }) + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 rule add prio 5000 from 10.129.0.2 table mynetwork", + Err: nil, + }) + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 rule add prio 5000 from 10.128.0.3 table mynetwork", + Err: nil, + }) + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 rule del prio 5000 from 10.129.0.2 table mynetwork", + Err: nil, + }) + fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ip -4 rule del prio 5000 from 10.128.0.3 table mynetwork", + Err: nil, + }) + epPortName := "https" + epPortValue := int32(443) + + egressService := egressserviceapi.EgressService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "service1", + Namespace: "namespace1", + }, + Spec: egressserviceapi.EgressServiceSpec{ + Network: "mynetwork", + }, + Status: egressserviceapi.EgressServiceStatus{ + Host: fakeNodeName, + }, + } + + service := *newService("service1", "namespace1", "10.129.0.2", + []v1.ServicePort{ + { + NodePort: int32(31111), + Protocol: v1.ProtocolTCP, + Port: int32(8080), + }, + }, + v1.ServiceTypeLoadBalancer, + []string{}, + v1.ServiceStatus{ + LoadBalancer: v1.LoadBalancerStatus{ + Ingress: []v1.LoadBalancerIngress{{ + IP: "5.5.5.5", + }}, + }, + }, + false, false, + ) + + ep1 := discovery.Endpoint{ + Addresses: []string{"10.128.0.3"}, + } + epPort := discovery.EndpointPort{ + Name: &epPortName, + Port: &epPortValue, + } + + // host-networked endpoint, should not have an SNAT rule created + ep2 := discovery.Endpoint{ + Addresses: []string{"192.168.18.15"}, + NodeName: &fakeNodeName, + } + // endpointSlice.Endpoints is ovn-networked so this will + // come under !hasLocalHostNetEp case + endpointSlice := *newEndpointSlice( + "service1", + "namespace1", + []discovery.Endpoint{ep1, ep2}, + []discovery.EndpointPort{epPort}) + + fakeOvnNode.start(ctx, + &v1.ServiceList{ + Items: []v1.Service{ + service, + }, + }, + &discovery.EndpointSliceList{ + Items: []discovery.EndpointSlice{ + endpointSlice, + }, + }, + &egressserviceapi.EgressServiceList{ + Items: []egressserviceapi.EgressService{ + egressService, + }, + }, + ) + + wf := fakeOvnNode.watcher.(*factory.WatchFactory) + c, err := egressservice.NewController(fakeOvnNode.stopChan, ovnKubeNodeSNATMark, fakeOvnNode.nc.name, + wf.EgressServiceInformer(), wf.ServiceInformer(), wf.EndpointSliceInformer()) + Expect(err).ToNot(HaveOccurred()) + fakeOvnNode.wg.Add(1) + go func() { + defer fakeOvnNode.wg.Done() + c.Run(1) + }() + + expectedTables := map[string]util.FakeTable{ + "nat": { + "OVN-KUBE-EGRESS-SVC": []string{ + "-m mark --mark 0x3f0 -m comment --comment DoNotSNAT -j RETURN", + "-s 10.128.0.3 -m comment --comment namespace1/service1 -j SNAT --to-source 5.5.5.5", + }, + }, + "filter": {}, + "mangle": {}, + } + f4 := iptV4.(*util.FakeIPTables) + Eventually(func() error { + return f4.MatchState(expectedTables) + }).ShouldNot(HaveOccurred()) + + expectedTables = map[string]util.FakeTable{ + "nat": { + "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment DoNotSNAT -j RETURN"}, + }, + "filter": {}, + "mangle": {}, + } + + err = fakeOvnNode.fakeClient.EgressServiceClient.K8sV1().EgressServices("namespace1").Delete(context.TODO(), "service1", metav1.DeleteOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func() error { + return f4.MatchState(expectedTables) + }).ShouldNot(HaveOccurred()) + + Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fakeOvnNode.fakeExec.ErrorDesc) + return nil + } + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + }) +}) diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 5e19a2eeb5..e62ea12e24 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -358,7 +358,7 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1144,7 +1144,7 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0`, "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": { "FORWARD": []string{ diff --git a/go-controller/pkg/node/gateway_iptables.go b/go-controller/pkg/node/gateway_iptables.go index 17eba5ef5c..360b23fd05 100644 --- a/go-controller/pkg/node/gateway_iptables.go +++ b/go-controller/pkg/node/gateway_iptables.go @@ -6,16 +6,15 @@ package node import ( "fmt" "net" - "strings" "github.com/coreos/go-iptables/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice" + nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" - "github.com/pkg/errors" kapi "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/util/errors" - "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" ) @@ -25,7 +24,6 @@ const ( iptableExternalIPChain = "OVN-KUBE-EXTERNALIP" // called from nat-PREROUTING and nat-OUTPUT iptableETPChain = "OVN-KUBE-ETP" // called from nat-PREROUTING only iptableITPChain = "OVN-KUBE-ITP" // called from mangle-OUTPUT and nat-OUTPUT - iptableESVCChain = "OVN-KUBE-EGRESS-SVC" // called from nat-POSTROUTING ) func clusterIPTablesProtocols() []iptables.Protocol { @@ -55,126 +53,56 @@ func getMasqueradeVIP(ip string) string { return types.V4HostETPLocalMasqueradeIP } -type iptRule struct { - table string - chain string - args []string - protocol iptables.Protocol -} - -func addIptRules(rules []iptRule, append bool) error { - addErrors := errors.New("") - var err error - var ipt util.IPTablesHelper - var exists bool - for _, r := range rules { - klog.V(5).Infof("Adding rule in table: %s, chain: %s with args: \"%s\" for protocol: %v ", - r.table, r.chain, strings.Join(r.args, " "), r.protocol) - if ipt, err = util.GetIPTablesHelper(r.protocol); err != nil { - addErrors = errors.Wrapf(addErrors, - "Failed to add iptables %s/%s rule %q: %v", r.table, r.chain, strings.Join(r.args, " "), err) - continue - } - if err = ipt.NewChain(r.table, r.chain); err != nil { - klog.V(5).Infof("Chain: \"%s\" in table: \"%s\" already exists, skipping creation: %v", - r.chain, r.table, err) - } - exists, err = ipt.Exists(r.table, r.chain, r.args...) - if !exists && err == nil { - if append { - err = ipt.Append(r.table, r.chain, r.args...) - } else { - err = ipt.Insert(r.table, r.chain, 1, r.args...) - } - } - if err != nil { - addErrors = errors.Wrapf(addErrors, "failed to add iptables %s/%s rule %q: %v", - r.table, r.chain, strings.Join(r.args, " "), err) - } - } - if addErrors.Error() == "" { - addErrors = nil - } - return addErrors -} - // insertIptRules adds the provided rules in an insert fashion // i.e each rule gets added at the first position in the chain -func insertIptRules(rules []iptRule) error { - return addIptRules(rules, false) +func insertIptRules(rules []nodeipt.Rule) error { + return nodeipt.AddRules(rules, false) } // appendIptRules adds the provided rules in an append fashion // i.e each rule gets added at the last position in the chain -func appendIptRules(rules []iptRule) error { - return addIptRules(rules, true) +func appendIptRules(rules []nodeipt.Rule) error { + return nodeipt.AddRules(rules, true) } -func delIptRules(rules []iptRule) error { - delErrors := errors.New("") - var err error - var ipt util.IPTablesHelper - for _, r := range rules { - klog.V(5).Infof("Deleting rule in table: %s, chain: %s with args: \"%s\" for protocol: %v ", - r.table, r.chain, strings.Join(r.args, " "), r.protocol) - if ipt, err = util.GetIPTablesHelper(r.protocol); err != nil { - delErrors = errors.Wrapf(delErrors, - "Failed to delete iptables %s/%s rule %q: %v", r.table, r.chain, strings.Join(r.args, " "), err) - continue - } - if exists, err := ipt.Exists(r.table, r.chain, r.args...); err == nil && exists { - err := ipt.Delete(r.table, r.chain, r.args...) - if err != nil { - delErrors = errors.Wrapf(delErrors, "failed to delete iptables %s/%s rule %q: %v", - r.table, r.chain, strings.Join(r.args, " "), err) - } - } - } - if delErrors.Error() == "" { - delErrors = nil - } - return delErrors -} - -func getGatewayInitRules(chain string, proto iptables.Protocol) []iptRule { - iptRules := []iptRule{} - if chain == iptableESVCChain { - return []iptRule{ +func getGatewayInitRules(chain string, proto iptables.Protocol) []nodeipt.Rule { + iptRules := []nodeipt.Rule{} + if chain == egressservice.Chain { + return []nodeipt.Rule{ { - table: "nat", - chain: "POSTROUTING", - args: []string{"-j", chain}, - protocol: proto, + Table: "nat", + Chain: "POSTROUTING", + Args: []string{"-j", chain}, + Protocol: proto, }, - egressSVCIPTDefaultReturnRule(), } } if chain == iptableITPChain { iptRules = append(iptRules, - iptRule{ - table: "mangle", - chain: "OUTPUT", - args: []string{"-j", chain}, - protocol: proto, + nodeipt.Rule{ + Table: "mangle", + Chain: "OUTPUT", + Args: []string{"-j", chain}, + Protocol: proto, }, ) } else { iptRules = append(iptRules, - iptRule{ - table: "nat", - chain: "PREROUTING", - args: []string{"-j", chain}, - protocol: proto, + nodeipt.Rule{ + Table: "nat", + Chain: "PREROUTING", + Args: []string{"-j", chain}, + Protocol: proto, }, ) } if chain != iptableETPChain { // ETP chain only meant for external traffic iptRules = append(iptRules, - iptRule{ - table: "nat", - chain: "OUTPUT", - args: []string{"-j", chain}, - protocol: proto, + nodeipt.Rule{ + Table: "nat", + Chain: "OUTPUT", + Args: []string{"-j", chain}, + Protocol: proto, }, ) } @@ -191,18 +119,18 @@ func getGatewayInitRules(chain string, proto iptables.Protocol) []iptRule { // // `svcHasLocalHostNetEndPnt` is true if this service has at least one host-networked endpoint that is local to this node // `isETPLocal` is true if the svc.Spec.ExternalTrafficPolicy=Local -func getNodePortIPTRules(svcPort kapi.ServicePort, targetIP string, targetPort int32, svcHasLocalHostNetEndPnt, isETPLocal bool) []iptRule { +func getNodePortIPTRules(svcPort kapi.ServicePort, targetIP string, targetPort int32, svcHasLocalHostNetEndPnt, isETPLocal bool) []nodeipt.Rule { chainName := iptableNodePortChain if !svcHasLocalHostNetEndPnt && isETPLocal { // DNAT it to the masqueradeIP:nodePort instead of clusterIP:targetPort targetIP = getMasqueradeVIP(targetIP) chainName = iptableETPChain } - return []iptRule{ + return []nodeipt.Rule{ { - table: "nat", - chain: chainName, - args: []string{ + Table: "nat", + Chain: chainName, + Args: []string{ "-p", string(svcPort.Protocol), "-m", "addrtype", "--dst-type", "LOCAL", @@ -210,7 +138,7 @@ func getNodePortIPTRules(svcPort kapi.ServicePort, targetIP string, targetPort i "-j", "DNAT", "--to-destination", util.JoinHostPortInt32(targetIP, targetPort), }, - protocol: getIPTablesProtocol(targetIP), + Protocol: getIPTablesProtocol(targetIP), }, } } @@ -220,35 +148,35 @@ func getNodePortIPTRules(svcPort kapi.ServicePort, targetIP string, targetPort i // `clusterIP` is clusterIP is the VIP of the service to match on // `svcHasLocalHostNetEndPnt` is true if this service has at least one host-networked endpoint that is local to this node // NOTE: Currently invoked only for Internal Traffic Policy -func getITPLocalIPTRules(svcPort kapi.ServicePort, clusterIP string, svcHasLocalHostNetEndPnt bool) []iptRule { +func getITPLocalIPTRules(svcPort kapi.ServicePort, clusterIP string, svcHasLocalHostNetEndPnt bool) []nodeipt.Rule { if svcHasLocalHostNetEndPnt { - return []iptRule{ + return []nodeipt.Rule{ { - table: "nat", - chain: iptableITPChain, - args: []string{ + Table: "nat", + Chain: iptableITPChain, + Args: []string{ "-p", string(svcPort.Protocol), "-d", clusterIP, "--dport", fmt.Sprintf("%v", svcPort.Port), "-j", "REDIRECT", "--to-port", fmt.Sprintf("%v", int32(svcPort.TargetPort.IntValue())), }, - protocol: getIPTablesProtocol(clusterIP), + Protocol: getIPTablesProtocol(clusterIP), }, } } - return []iptRule{ + return []nodeipt.Rule{ { - table: "mangle", - chain: iptableITPChain, - args: []string{ + Table: "mangle", + Chain: iptableITPChain, + Args: []string{ "-p", string(svcPort.Protocol), "-d", string(clusterIP), "--dport", fmt.Sprintf("%d", svcPort.Port), "-j", "MARK", "--set-xmark", string(ovnkubeITPMark), }, - protocol: getIPTablesProtocol(clusterIP), + Protocol: getIPTablesProtocol(clusterIP), }, } } @@ -257,17 +185,17 @@ func getITPLocalIPTRules(svcPort kapi.ServicePort, clusterIP string, svcHasLocal // `svcPort` corresponds to port details for this service as specified in the service object // `targetIP` corresponds to svc.spec.ClusterIP // This function returns a RETURN rule in iptableMgmPortChain to prevent SNAT of sourceIP -func getNodePortETPLocalIPTRules(svcPort kapi.ServicePort, targetIP string) []iptRule { - return []iptRule{ +func getNodePortETPLocalIPTRules(svcPort kapi.ServicePort, targetIP string) []nodeipt.Rule { + return []nodeipt.Rule{ { - table: "nat", - chain: iptableMgmPortChain, - args: []string{ + Table: "nat", + Chain: iptableMgmPortChain, + Args: []string{ "-p", string(svcPort.Protocol), "--dport", fmt.Sprintf("%d", svcPort.NodePort), "-j", "RETURN", }, - protocol: getIPTablesProtocol(targetIP), + Protocol: getIPTablesProtocol(targetIP), }, } } @@ -276,8 +204,8 @@ func computeProbability(n, i int) string { return fmt.Sprintf("%0.10f", 1.0/float64(n-i+1)) } -func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort kapi.ServicePort, externalIP string, service *kapi.Service, localEndpoints []string) []iptRule { - var iptRules []iptRule +func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort kapi.ServicePort, externalIP string, service *kapi.Service, localEndpoints []string) []nodeipt.Rule { + var iptRules []nodeipt.Rule if len(localEndpoints) == 0 { // either its smart nic mode; etp&itp not implemented, OR // fetching endpointSlices error-ed out prior to reaching here so nothing to do @@ -285,11 +213,11 @@ func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort kapi.ServicePort, } numLocalEndpoints := len(localEndpoints) for i, ip := range localEndpoints { - iptRules = append([]iptRule{ + iptRules = append([]nodeipt.Rule{ { - table: "nat", - chain: iptableETPChain, - args: []string{ + Table: "nat", + Chain: iptableETPChain, + Args: []string{ "-p", string(svcPort.Protocol), "-d", externalIP, "--dport", fmt.Sprintf("%v", svcPort.Port), @@ -299,18 +227,18 @@ func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort kapi.ServicePort, "--mode", "random", "--probability", computeProbability(numLocalEndpoints, i+1), }, - protocol: getIPTablesProtocol(externalIP), + Protocol: getIPTablesProtocol(externalIP), }, { - table: "nat", - chain: iptableMgmPortChain, - args: []string{ + Table: "nat", + Chain: iptableMgmPortChain, + Args: []string{ "-p", string(svcPort.Protocol), "-d", ip, "--dport", fmt.Sprintf("%v", int32(svcPort.TargetPort.IntValue())), "-j", "RETURN", }, - protocol: getIPTablesProtocol(externalIP), + Protocol: getIPTablesProtocol(externalIP), }, }, iptRules...) } @@ -327,7 +255,7 @@ func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort kapi.ServicePort, // // `svcHasLocalHostNetEndPnt` is true if this service has at least one host-networked endpoint that is local to this node // `isETPLocal` is true if the svc.Spec.ExternalTrafficPolicy=Local -func getExternalIPTRules(svcPort kapi.ServicePort, externalIP, dstIP string, svcHasLocalHostNetEndPnt, isETPLocal bool) []iptRule { +func getExternalIPTRules(svcPort kapi.ServicePort, externalIP, dstIP string, svcHasLocalHostNetEndPnt, isETPLocal bool) []nodeipt.Rule { targetPort := svcPort.Port chainName := iptableExternalIPChain if !svcHasLocalHostNetEndPnt && isETPLocal { @@ -336,89 +264,89 @@ func getExternalIPTRules(svcPort kapi.ServicePort, externalIP, dstIP string, svc targetPort = svcPort.NodePort chainName = iptableETPChain } - return []iptRule{ + return []nodeipt.Rule{ { - table: "nat", - chain: chainName, - args: []string{ + Table: "nat", + Chain: chainName, + Args: []string{ "-p", string(svcPort.Protocol), "-d", externalIP, "--dport", fmt.Sprintf("%v", svcPort.Port), "-j", "DNAT", "--to-destination", util.JoinHostPortInt32(dstIP, targetPort), }, - protocol: getIPTablesProtocol(externalIP), + Protocol: getIPTablesProtocol(externalIP), }, } } -func getGatewayForwardRules(svcCIDR *net.IPNet) []iptRule { +func getGatewayForwardRules(svcCIDR *net.IPNet) []nodeipt.Rule { protocol := getIPTablesProtocol(svcCIDR.IP.String()) masqueradeIP := types.V4OVNMasqueradeIP if protocol == iptables.ProtocolIPv6 { masqueradeIP = types.V6OVNMasqueradeIP } - return []iptRule{ + return []nodeipt.Rule{ { - table: "filter", - chain: "FORWARD", - args: []string{ + Table: "filter", + Chain: "FORWARD", + Args: []string{ "-s", svcCIDR.String(), "-j", "ACCEPT", }, - protocol: protocol, + Protocol: protocol, }, { - table: "filter", - chain: "FORWARD", - args: []string{ + Table: "filter", + Chain: "FORWARD", + Args: []string{ "-d", svcCIDR.String(), "-j", "ACCEPT", }, - protocol: protocol, + Protocol: protocol, }, { - table: "filter", - chain: "FORWARD", - args: []string{ + Table: "filter", + Chain: "FORWARD", + Args: []string{ "-s", masqueradeIP, "-j", "ACCEPT", }, - protocol: protocol, + Protocol: protocol, }, { - table: "filter", - chain: "FORWARD", - args: []string{ + Table: "filter", + Chain: "FORWARD", + Args: []string{ "-d", masqueradeIP, "-j", "ACCEPT", }, - protocol: protocol, + Protocol: protocol, }, } } -func getGatewayDropRules(ifName string) []iptRule { - var dropRules []iptRule +func getGatewayDropRules(ifName string) []nodeipt.Rule { + var dropRules []nodeipt.Rule for _, protocol := range clusterIPTablesProtocols() { - dropRules = append(dropRules, []iptRule{ + dropRules = append(dropRules, []nodeipt.Rule{ { - table: "filter", - chain: "FORWARD", - args: []string{ + Table: "filter", + Chain: "FORWARD", + Args: []string{ "-i", ifName, "-j", "DROP", }, - protocol: protocol, + Protocol: protocol, }, { - table: "filter", - chain: "FORWARD", - args: []string{ + Table: "filter", + Chain: "FORWARD", + Args: []string{ "-o", ifName, "-j", "DROP", }, - protocol: protocol, + Protocol: protocol, }, }...) } @@ -442,60 +370,60 @@ func initExternalBridgeDropForwardingRules(ifName string) error { return appendIptRules(getGatewayDropRules(ifName)) } -func getLocalGatewayNATRules(ifname string, cidr *net.IPNet) []iptRule { +func getLocalGatewayNATRules(ifname string, cidr *net.IPNet) []nodeipt.Rule { // Allow packets to/from the gateway interface in case defaults deny protocol := getIPTablesProtocol(cidr.IP.String()) masqueradeIP := types.V4OVNMasqueradeIP if protocol == iptables.ProtocolIPv6 { masqueradeIP = types.V6OVNMasqueradeIP } - return []iptRule{ + return []nodeipt.Rule{ { - table: "filter", - chain: "FORWARD", - args: []string{ + Table: "filter", + Chain: "FORWARD", + Args: []string{ "-o", ifname, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT", }, - protocol: protocol, + Protocol: protocol, }, { - table: "filter", - chain: "FORWARD", - args: []string{ + Table: "filter", + Chain: "FORWARD", + Args: []string{ "-i", ifname, "-j", "ACCEPT", }, - protocol: protocol, + Protocol: protocol, }, { - table: "filter", - chain: "INPUT", - args: []string{ + Table: "filter", + Chain: "INPUT", + Args: []string{ "-i", ifname, "-m", "comment", "--comment", "from OVN to localhost", "-j", "ACCEPT", }, - protocol: protocol, + Protocol: protocol, }, { - table: "nat", - chain: "POSTROUTING", - args: []string{ + Table: "nat", + Chain: "POSTROUTING", + Args: []string{ "-s", masqueradeIP, "-j", "MASQUERADE", }, - protocol: protocol, + Protocol: protocol, }, { - table: "nat", - chain: "POSTROUTING", - args: []string{ + Table: "nat", + Chain: "POSTROUTING", + Args: []string{ "-s", cidr.String(), "-j", "MASQUERADE", }, - protocol: protocol, + Protocol: protocol, }, } } @@ -512,10 +440,10 @@ func addChaintoTable(ipt util.IPTablesHelper, tableName, chain string) { } } -func handleGatewayIPTables(iptCallback func(rules []iptRule) error, genGatewayChainRules func(chain string, proto iptables.Protocol) []iptRule) error { - rules := make([]iptRule, 0) +func handleGatewayIPTables(iptCallback func(rules []nodeipt.Rule) error, genGatewayChainRules func(chain string, proto iptables.Protocol) []nodeipt.Rule) error { + rules := make([]nodeipt.Rule, 0) // (NOTE: Order is important, add jump to iptableETPChain before jump to NP/EIP chains) - for _, chain := range []string{iptableITPChain, iptableESVCChain, iptableNodePortChain, iptableExternalIPChain, iptableETPChain} { + for _, chain := range []string{iptableITPChain, egressservice.Chain, iptableNodePortChain, iptableExternalIPChain, iptableETPChain} { for _, proto := range clusterIPTablesProtocols() { ipt, err := util.GetIPTablesHelper(proto) if err != nil { @@ -562,7 +490,7 @@ func cleanupSharedGatewayIPTChains() { } } -func recreateIPTRules(table, chain string, keepIPTRules []iptRule) error { +func recreateIPTRules(table, chain string, keepIPTRules []nodeipt.Rule) error { var errors []error var err error var ipt util.IPTablesHelper @@ -572,7 +500,7 @@ func recreateIPTRules(table, chain string, keepIPTRules []iptRule) error { continue } if err = ipt.ClearChain(table, chain); err != nil { - errors = append(errors, fmt.Errorf("error clearing chain: %s in table: %s, err: %v", chain, table, err)) + errors = append(errors, fmt.Errorf("error clearing Chain: %s in Table: %s, err: %v", chain, table, err)) } } if err = insertIptRules(keepIPTRules); err != nil { @@ -590,8 +518,8 @@ func recreateIPTRules(table, chain string, keepIPTRules []iptRule) error { // case3: if svcHasLocalHostNetEndPnt and svcTypeIsITPLocal, rule that redirects clusterIP traffic to host targetPort is added. // // if !svcHasLocalHostNetEndPnt and svcTypeIsITPLocal, rule that marks clusterIP traffic to steer it to ovn-k8s-mp0 is added. -func getGatewayIPTRules(service *kapi.Service, localEndpoints []string, svcHasLocalHostNetEndPnt bool) []iptRule { - rules := make([]iptRule, 0) +func getGatewayIPTRules(service *kapi.Service, localEndpoints []string, svcHasLocalHostNetEndPnt bool) []nodeipt.Rule { + rules := make([]nodeipt.Rule, 0) clusterIPs := util.GetClusterIPs(service) svcTypeIsETPLocal := util.ServiceExternalTrafficPolicyLocal(service) svcTypeIsITPLocal := util.ServiceInternalTrafficPolicyLocal(service) @@ -654,46 +582,3 @@ func getGatewayIPTRules(service *kapi.Service, localEndpoints []string, svcHasLo } return rules } - -// Returns all of the SNAT rules that should be created for an egress service with the given endpoints. -func egressSVCIPTRulesForEndpoints(svc *kapi.Service, v4Eps, v6Eps []string) []iptRule { - rules := []iptRule{} - - comment, _ := cache.MetaNamespaceKeyFunc(svc) - for _, lb := range svc.Status.LoadBalancer.Ingress { - lbIPStr := utilnet.ParseIPSloppy(lb.IP).String() - lbProto := getIPTablesProtocol(lbIPStr) - epsForProto := v4Eps - if lbProto == iptables.ProtocolIPv6 { - epsForProto = v6Eps - } - - for _, ep := range epsForProto { - rules = append(rules, iptRule{ - table: "nat", - chain: iptableESVCChain, - args: []string{ - "-s", ep, - "-m", "comment", "--comment", comment, - "-j", "SNAT", - "--to-source", lbIPStr, - }, - protocol: lbProto, - }) - } - } - - return rules -} - -func egressSVCIPTDefaultReturnRule() iptRule { - return iptRule{ - table: "nat", - chain: iptableESVCChain, - args: []string{ - "-m", "mark", "--mark", string(ovnKubeNodeSNATMark), - "-m", "comment", "--comment", "Do not SNAT to SVC VIP", - "-j", "RETURN", - }, - } -} diff --git a/go-controller/pkg/node/gateway_localnet.go b/go-controller/pkg/node/gateway_localnet.go index b8fb36936b..9c92a0c06f 100644 --- a/go-controller/pkg/node/gateway_localnet.go +++ b/go-controller/pkg/node/gateway_localnet.go @@ -124,7 +124,7 @@ func newLocalGateway(nodeName string, hostSubnets []*net.IPNet, gwNextHops []net return err } } - gw.nodePortWatcher, err = newNodePortWatcher(gwBridge, nodeName, gw.openflowManager, gw.nodeIPManager, watchFactory) + gw.nodePortWatcher, err = newNodePortWatcher(gwBridge, gw.openflowManager, gw.nodeIPManager, watchFactory) if err != nil { return err } diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index 195ce8465f..3260717199 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -49,13 +49,11 @@ func initFakeNodePortWatcher(iptV4, iptV6 util.IPTablesHelper) *nodePortWatcher Expect(err).NotTo(HaveOccurred()) fNPW := nodePortWatcher{ - ofportPhys: "eth0", - ofportPatch: "patch-breth0_ov", - gatewayIPv4: v4localnetGatewayIP, - gatewayIPv6: v6localnetGatewayIP, - nodeName: "mynode", - serviceInfo: make(map[k8stypes.NamespacedName]*serviceConfig), - egressServiceInfo: make(map[k8stypes.NamespacedName]*serviceEps), + ofportPhys: "eth0", + ofportPatch: "patch-breth0_ov", + gatewayIPv4: v4localnetGatewayIP, + gatewayIPv6: v6localnetGatewayIP, + serviceInfo: make(map[k8stypes.NamespacedName]*serviceConfig), ofm: &openflowManager{ flowCache: map[string][]string{}, }, @@ -139,6 +137,7 @@ func newService(name, namespace, ip string, ports []v1.ServicePort, serviceType ObjectMeta: newObjectMeta(name, namespace), Spec: v1.ServiceSpec{ ClusterIP: ip, + ClusterIPs: []string{ip}, Ports: ports, Type: serviceType, ExternalIPs: externalIPs, @@ -352,7 +351,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -429,7 +428,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -508,7 +507,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -601,7 +600,7 @@ var _ = Describe("Node Operations", func() { fmt.Sprintf("-p %s -m addrtype --dst-type LOCAL --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, service.Spec.Ports[0].NodePort, types.V4HostETPLocalMasqueradeIP, service.Spec.Ports[0].NodePort), }, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -697,7 +696,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -801,7 +800,7 @@ var _ = Describe("Node Operations", func() { fmt.Sprintf("-p %s -m addrtype --dst-type LOCAL --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, service.Spec.Ports[0].NodePort, types.V4HostETPLocalMasqueradeIP, service.Spec.Ports[0].NodePort), }, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -936,7 +935,7 @@ var _ = Describe("Node Operations", func() { fmt.Sprintf("-p %s -d %s --dport %v -j DNAT --to-destination %s:%d -m statistic --mode random --probability 1.0000000000", service.Spec.Ports[0].Protocol, externalIP, service.Spec.Ports[0].Port, ep2.Addresses[0], int32(service.Spec.Ports[0].TargetPort.IntValue())), }, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1040,7 +1039,7 @@ var _ = Describe("Node Operations", func() { }, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1157,7 +1156,7 @@ var _ = Describe("Node Operations", func() { fmt.Sprintf("-p %s -d %s --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, externalIP, service.Spec.Ports[0].Port, types.V4HostETPLocalMasqueradeIP, service.Spec.Ports[0].NodePort), }, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1261,7 +1260,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1358,7 +1357,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1450,7 +1449,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1531,7 +1530,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1619,7 +1618,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1658,7 +1657,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1741,7 +1740,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-NODEPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1847,7 +1846,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1886,7 +1885,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -1980,7 +1979,7 @@ var _ = Describe("Node Operations", func() { fmt.Sprintf("-p %s -m addrtype --dst-type LOCAL --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, service.Spec.Ports[0].NodePort, types.V4HostETPLocalMasqueradeIP, service.Spec.Ports[0].NodePort), }, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2021,7 +2020,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2117,7 +2116,7 @@ var _ = Describe("Node Operations", func() { }, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2163,7 +2162,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2263,7 +2262,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2310,7 +2309,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ITP": []string{}, "OVN-KUBE-ETP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2405,7 +2404,7 @@ var _ = Describe("Node Operations", func() { }, "OVN-KUBE-ITP": []string{}, "OVN-KUBE-ETP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2453,7 +2452,7 @@ var _ = Describe("Node Operations", func() { }, "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ETP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2552,7 +2551,7 @@ var _ = Describe("Node Operations", func() { fmt.Sprintf("-p %s -d %s --dport %d -j REDIRECT --to-port %d", service.Spec.Ports[0].Protocol, service.Spec.ClusterIP, service.Spec.Ports[0].Port, int32(service.Spec.Ports[0].TargetPort.IntValue())), }, "OVN-KUBE-ETP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2599,7 +2598,7 @@ var _ = Describe("Node Operations", func() { "OVN-KUBE-SNAT-MGMTPORT": []string{}, "OVN-KUBE-ITP": []string{}, "OVN-KUBE-ETP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, + "OVN-KUBE-EGRESS-SVC": []string{}, }, "filter": {}, "mangle": { @@ -2622,165 +2621,5 @@ var _ = Describe("Node Operations", func() { err := app.Run([]string{app.Name}) Expect(err).NotTo(HaveOccurred()) }) - - It("manages iptables rules for LoadBalancer egress service backed by ovn-k pods", func() { - app.Action = func(ctx *cli.Context) error { - config.Gateway.Mode = config.GatewayModeShared - _, cidr4, _ := net.ParseCIDR("10.128.0.0/16") - config.Default.ClusterSubnets = []config.CIDRNetworkEntry{{CIDR: cidr4, HostSubnetLength: 24}} - fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-ofctl show ", - Err: fmt.Errorf("deliberate error to fall back to output:LOCAL"), - }) - fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-ofctl show ", - Err: fmt.Errorf("deliberate error to fall back to output:LOCAL"), - }) - epPortName := "https" - epPortValue := int32(443) - - service := *newService("service1", "namespace1", "10.129.0.2", - []v1.ServicePort{ - { - NodePort: int32(31111), - Protocol: v1.ProtocolTCP, - Port: int32(8080), - }, - }, - v1.ServiceTypeLoadBalancer, - []string{}, - v1.ServiceStatus{ - LoadBalancer: v1.LoadBalancerStatus{ - Ingress: []v1.LoadBalancerIngress{{ - IP: "5.5.5.5", - }}, - }, - }, - false, false, - ) - service.Annotations[util.EgressSVCHostAnnotation] = "mynode" - service.Annotations[util.EgressSVCAnnotation] = "{}" - - ep1 := discovery.Endpoint{ - Addresses: []string{"10.128.0.3"}, - } - epPort := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, - } - - // host-networked endpoint, should not have an SNAT rule created - ep2 := discovery.Endpoint{ - Addresses: []string{"192.168.18.15"}, - NodeName: &fakeNodeName, - } - // endpointSlice.Endpoints is ovn-networked so this will - // come under !hasLocalHostNetEp case - endpointSlice := *newEndpointSlice( - "service1", - "namespace1", - []discovery.Endpoint{ep1, ep2}, - []discovery.EndpointPort{epPort}) - - fakeOvnNode.start(ctx, - &v1.ServiceList{ - Items: []v1.Service{ - service, - }, - }, - &endpointSlice, - ) - - fNPW.watchFactory = fakeOvnNode.watcher - Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) - - expectedTables := map[string]util.FakeTable{ - "nat": { - "PREROUTING": []string{ - "-j OVN-KUBE-ETP", - "-j OVN-KUBE-EXTERNALIP", - "-j OVN-KUBE-NODEPORT", - }, - "OUTPUT": []string{ - "-j OVN-KUBE-EXTERNALIP", - "-j OVN-KUBE-NODEPORT", - "-j OVN-KUBE-ITP", - }, - "POSTROUTING": []string{ - "-j OVN-KUBE-EGRESS-SVC", - }, - "OVN-KUBE-NODEPORT": []string{ - fmt.Sprintf("-p %s -m addrtype --dst-type LOCAL --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, service.Spec.Ports[0].NodePort, service.Spec.ClusterIP, service.Spec.Ports[0].Port), - }, - "OVN-KUBE-SNAT-MGMTPORT": []string{}, - "OVN-KUBE-EXTERNALIP": []string{ - fmt.Sprintf("-p %s -d %s --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, service.Status.LoadBalancer.Ingress[0].IP, service.Spec.Ports[0].Port, service.Spec.ClusterIP, service.Spec.Ports[0].Port), - }, - "OVN-KUBE-ETP": []string{}, - "OVN-KUBE-ITP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{ - "-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN", - "-s 10.128.0.3 -m comment --comment namespace1/service1 -j SNAT --to-source 5.5.5.5", - }, - }, - "filter": {}, - "mangle": { - "OUTPUT": []string{ - "-j OVN-KUBE-ITP", - }, - "OVN-KUBE-ITP": []string{}, - }, - } - - f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables) - Expect(err).NotTo(HaveOccurred()) - - addConntrackMocks(netlinkMock, []ctFilterDesc{{"5.5.5.5", 8080}, {"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) - - expectedTables = map[string]util.FakeTable{ - "nat": { - "OVN-KUBE-EXTERNALIP": []string{}, - "OVN-KUBE-NODEPORT": []string{}, - "OVN-KUBE-ITP": []string{}, - "PREROUTING": []string{ - "-j OVN-KUBE-ETP", - "-j OVN-KUBE-EXTERNALIP", - "-j OVN-KUBE-NODEPORT", - }, - "OUTPUT": []string{ - "-j OVN-KUBE-EXTERNALIP", - "-j OVN-KUBE-NODEPORT", - "-j OVN-KUBE-ITP", - }, - "POSTROUTING": []string{ - "-j OVN-KUBE-EGRESS-SVC", - }, - "OVN-KUBE-SNAT-MGMTPORT": []string{}, - "OVN-KUBE-ETP": []string{}, - "OVN-KUBE-EGRESS-SVC": []string{"-m mark --mark 0x3f0 -m comment --comment Do not SNAT to SVC VIP -j RETURN"}, - }, - "filter": {}, - "mangle": { - "OUTPUT": []string{ - "-j OVN-KUBE-ITP", - }, - "OVN-KUBE-ITP": []string{}, - }, - } - - f4 = iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables) - Expect(err).NotTo(HaveOccurred()) - - return nil - } - err := app.Run([]string{app.Name}) - Expect(err).NotTo(HaveOccurred()) - }) }) }) diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index b2220a0e36..c2aaea2fed 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -11,6 +11,8 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice" + nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/vishvananda/netlink" @@ -77,15 +79,12 @@ type nodePortWatcher struct { ofportPhys string ofportPatch string gwBridge string - nodeName string // Map of service name to programmed iptables/OF rules - serviceInfo map[ktypes.NamespacedName]*serviceConfig - serviceInfoLock sync.Mutex - egressServiceInfo map[ktypes.NamespacedName]*serviceEps - egressServiceInfoLock sync.Mutex - ofm *openflowManager - nodeIPManager *addressManager - watchFactory factory.NodeWatchFactory + serviceInfo map[ktypes.NamespacedName]*serviceConfig + serviceInfoLock sync.Mutex + ofm *openflowManager + nodeIPManager *addressManager + watchFactory factory.NodeWatchFactory } type serviceConfig struct { @@ -97,11 +96,6 @@ type serviceConfig struct { localEndpoints sets.Set[string] } -type serviceEps struct { - v4 sets.Set[string] - v6 sets.Set[string] -} - type cidrAndFlags struct { ipNet *net.IPNet flags int @@ -462,9 +456,6 @@ func addServiceRules(service *kapi.Service, localEndpoints []string, svcHasLocal if err = addGatewayIptRules(service, localEndpoints, svcHasLocalHostNetEndPnt); err != nil { errors = append(errors, err) } - if err = updateEgressSVCIptRules(service, npw); err != nil { - errors = append(errors, err) - } } } else { // For Host Only Mode @@ -521,9 +512,6 @@ func delServiceRules(service *kapi.Service, localEndpoints []string, npw *nodePo if err = delGatewayIptRules(service, localEndpoints, false); err != nil { errors = append(errors, fmt.Errorf("error updating service flow cache: %v", err)) } - if err = delAllEgressSVCIptRules(service, npw); err != nil { - errors = append(errors, fmt.Errorf("error updating service flow cache: %v", err)) - } } } else { @@ -547,7 +535,6 @@ func serviceUpdateNotNeeded(old, new *kapi.Service) bool { reflect.DeepEqual(new.Spec.ExternalTrafficPolicy, old.Spec.ExternalTrafficPolicy) && (new.Spec.InternalTrafficPolicy != nil && old.Spec.InternalTrafficPolicy != nil && reflect.DeepEqual(*new.Spec.InternalTrafficPolicy, *old.Spec.InternalTrafficPolicy)) && - !util.EgressSVCHostChanged(old, new) && (new.Spec.AllocateLoadBalancerNodePorts != nil && old.Spec.AllocateLoadBalancerNodePorts != nil && reflect.DeepEqual(*new.Spec.AllocateLoadBalancerNodePorts, *old.Spec.AllocateLoadBalancerNodePorts)) } @@ -598,7 +585,7 @@ func (npw *nodePortWatcher) UpdateService(old, new *kapi.Service) error { if serviceUpdateNotNeeded(old, new) { klog.V(5).Infof("Skipping service update for: %s as change does not apply to any of .Spec.Ports, "+ ".Spec.ExternalIP, .Spec.ClusterIP, .Spec.ClusterIPs, .Spec.Type, .Status.LoadBalancer.Ingress, "+ - ".Spec.ExternalTrafficPolicy, .Spec.InternalTrafficPolicy, Egress service host", new.Name) + ".Spec.ExternalTrafficPolicy, .Spec.InternalTrafficPolicy", new.Name) return nil } // Update the service in svcConfig if we need to so that other handler @@ -706,7 +693,7 @@ func (npw *nodePortWatcher) DeleteService(service *kapi.Service) error { func (npw *nodePortWatcher) SyncServices(services []interface{}) error { var err error var errors []error - keepIPTRules := []iptRule{} + keepIPTRules := []nodeipt.Rule{} for _, serviceInterface := range services { name := ktypes.NamespacedName{Namespace: serviceInterface.(*kapi.Service).Namespace, Name: serviceInterface.(*kapi.Service).Name} @@ -742,46 +729,14 @@ func (npw *nodePortWatcher) SyncServices(services []interface{}) error { if !npw.dpuMode { keepIPTRules = append(keepIPTRules, getGatewayIPTRules(service, sets.List(localEndpoints), hasLocalHostNetworkEp)...) } - - if !npw.dpuMode && shouldConfigureEgressSVC(service, npw) { - v4Eps := sets.New[string]() - v6Eps := sets.New[string]() - - for _, epSlice := range epSlices { - if epSlice.AddressType == discovery.AddressTypeFQDN { - continue - } - epsToInsert := v4Eps - if epSlice.AddressType == discovery.AddressTypeIPv6 { - epsToInsert = v6Eps - } - - for _, ep := range epSlice.Endpoints { - for _, ip := range ep.Addresses { - ipStr := utilnet.ParseIPSloppy(ip).String() - if !util.IsHostEndpoint(ipStr) { - epsToInsert.Insert(ipStr) - } - } - } - } - - keepIPTRules = append(keepIPTRules, egressSVCIPTRulesForEndpoints(service, v4Eps.UnsortedList(), v6Eps.UnsortedList())...) - - npw.egressServiceInfoLock.Lock() - npw.egressServiceInfo[name] = &serviceEps{v4: v4Eps, v6: v6Eps} - npw.egressServiceInfoLock.Unlock() - } - } - if !npw.dpuMode { - keepIPTRules = append(keepIPTRules, egressSVCIPTDefaultReturnRule()) } + // sync OF rules once npw.ofm.requestFlowSync() // sync IPtables rules once only for Full mode if !npw.dpuMode { // (NOTE: Order is important, add jump to iptableETPChain before jump to NP/EIP chains) - for _, chain := range []string{iptableITPChain, iptableESVCChain, iptableNodePortChain, iptableExternalIPChain, iptableETPChain, iptableMgmPortChain} { + for _, chain := range []string{iptableITPChain, egressservice.Chain, iptableNodePortChain, iptableExternalIPChain, iptableETPChain, iptableMgmPortChain} { if err = recreateIPTRules("nat", chain, keepIPTRules); err != nil { errors = append(errors, err) } @@ -850,15 +805,6 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e } return apierrors.NewAggregate(errors) } - - // Call this in case it wasn't already called by addServiceRules - npw.egressServiceInfoLock.Lock() - _, found := npw.egressServiceInfo[namespacedName] - npw.egressServiceInfoLock.Unlock() - if found && !npw.dpuMode { - return updateEgressSVCIptRules(svc, npw) - } - return nil } @@ -991,15 +937,6 @@ func (npw *nodePortWatcher) UpdateEndpointSlice(oldEpSlice, newEpSlice *discover return apierrors.NewAggregate(errors) } - // Call this in case it wasn't already called by addServiceRules - npw.egressServiceInfoLock.Lock() - _, found := npw.egressServiceInfo[namespacedName] - npw.egressServiceInfoLock.Unlock() - if found && !npw.dpuMode && svc != nil { - if err = updateEgressSVCIptRules(svc, npw); err != nil { - errors = append(errors, err) - } - } return apierrors.NewAggregate(errors) } @@ -1020,7 +957,7 @@ func (npwipt *nodePortWatcherIptables) UpdateService(old, new *kapi.Service) err if serviceUpdateNotNeeded(old, new) { klog.V(5).Infof("Skipping service update for: %s as change does not apply to "+ "any of .Spec.Ports, .Spec.ExternalIP, .Spec.ClusterIP, .Spec.ClusterIPs,"+ - " .Spec.Type, .Status.LoadBalancer.Ingress, Egress service annotations", new.Name) + " .Spec.Type, .Status.LoadBalancer.Ingress", new.Name) return nil } @@ -1056,7 +993,7 @@ func (npwipt *nodePortWatcherIptables) DeleteService(service *kapi.Service) erro func (npwipt *nodePortWatcherIptables) SyncServices(services []interface{}) error { var err error var errors []error - keepIPTRules := []iptRule{} + keepIPTRules := []nodeipt.Rule{} for _, serviceInterface := range services { service, ok := serviceInterface.(*kapi.Service) if !ok { @@ -1785,7 +1722,7 @@ func newSharedGateway(nodeName string, subnets []*net.IPNet, gwNextHops []net.IP } } klog.Info("Creating Shared Gateway Node Port Watcher") - gw.nodePortWatcher, err = newNodePortWatcher(gwBridge, nodeName, gw.openflowManager, gw.nodeIPManager, watchFactory) + gw.nodePortWatcher, err = newNodePortWatcher(gwBridge, gw.openflowManager, gw.nodeIPManager, watchFactory) if err != nil { return err } @@ -1805,7 +1742,7 @@ func newSharedGateway(nodeName string, subnets []*net.IPNet, gwNextHops []net.IP return gw, nil } -func newNodePortWatcher(gwBridge *bridgeConfiguration, nodeName string, ofm *openflowManager, +func newNodePortWatcher(gwBridge *bridgeConfiguration, ofm *openflowManager, nodeIPManager *addressManager, watchFactory factory.NodeWatchFactory) (*nodePortWatcher, error) { // Get ofport of patchPort ofportPatch, stderr, err := util.GetOVSOfPort("--if-exists", "get", @@ -1862,18 +1799,16 @@ func newNodePortWatcher(gwBridge *bridgeConfiguration, nodeName string, ofm *ope gatewayIPv4, gatewayIPv6 := getGatewayFamilyAddrs(gwBridge.ips) npw := &nodePortWatcher{ - dpuMode: dpuMode, - gatewayIPv4: gatewayIPv4, - gatewayIPv6: gatewayIPv6, - ofportPhys: ofportPhys, - ofportPatch: ofportPatch, - gwBridge: gwBridge.bridgeName, - nodeName: nodeName, - serviceInfo: make(map[ktypes.NamespacedName]*serviceConfig), - egressServiceInfo: make(map[ktypes.NamespacedName]*serviceEps), - nodeIPManager: nodeIPManager, - ofm: ofm, - watchFactory: watchFactory, + dpuMode: dpuMode, + gatewayIPv4: gatewayIPv4, + gatewayIPv6: gatewayIPv6, + ofportPhys: ofportPhys, + ofportPatch: ofportPatch, + gwBridge: gwBridge.bridgeName, + serviceInfo: make(map[ktypes.NamespacedName]*serviceConfig), + nodeIPManager: nodeIPManager, + ofm: ofm, + watchFactory: watchFactory, } return npw, nil } diff --git a/go-controller/pkg/node/gateway_shared_intf_linux.go b/go-controller/pkg/node/gateway_shared_intf_linux.go index 1ac2a847fb..8237e993ec 100644 --- a/go-controller/pkg/node/gateway_shared_intf_linux.go +++ b/go-controller/pkg/node/gateway_shared_intf_linux.go @@ -7,15 +7,12 @@ import ( "fmt" "strings" + nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" kapi "k8s.io/api/core/v1" - v1 "k8s.io/api/discovery/v1" - ktypes "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" - utilnet "k8s.io/utils/net" ) // deletes the local bridge used for DGP and removes the corresponding iface, as well as OVS bridge mappings @@ -73,115 +70,8 @@ func addGatewayIptRules(service *kapi.Service, localEndpoints []string, svcHasLo func delGatewayIptRules(service *kapi.Service, localEndpoints []string, svcHasLocalHostNetEndPnt bool) error { rules := getGatewayIPTRules(service, localEndpoints, svcHasLocalHostNetEndPnt) - if err := delIptRules(rules); err != nil { + if err := nodeipt.DelRules(rules); err != nil { return fmt.Errorf("failed to delete iptables rules for service %s/%s: %v", service.Namespace, service.Name, err) } return nil } - -func updateEgressSVCIptRules(svc *kapi.Service, npw *nodePortWatcher) error { - if !shouldConfigureEgressSVC(svc, npw) { - return nil - } - - npw.egressServiceInfoLock.Lock() - defer npw.egressServiceInfoLock.Unlock() - - key := ktypes.NamespacedName{Namespace: svc.Namespace, Name: svc.Name} - cachedEps := npw.egressServiceInfo[key] - if cachedEps == nil { - cachedEps = &serviceEps{sets.New[string](), sets.New[string]()} - npw.egressServiceInfo[key] = cachedEps - } - - epSlices, err := npw.watchFactory.GetEndpointSlices(svc.Namespace, svc.Name) - if err != nil { - return fmt.Errorf("failed to get endpointslices for egress service %s/%s during update: %v", - svc.Namespace, svc.Name, err) - } - - v4Eps := sets.New[string]() // All current v4 eps - v6Eps := sets.New[string]() // All current v6 eps - for _, epSlice := range epSlices { - if epSlice.AddressType == v1.AddressTypeFQDN { - continue - } - epsToInsert := v4Eps - if epSlice.AddressType == v1.AddressTypeIPv6 { - epsToInsert = v6Eps - } - - for _, ep := range epSlice.Endpoints { - for _, ip := range ep.Addresses { - ipStr := utilnet.ParseIPSloppy(ip).String() - if !util.IsHostEndpoint(ipStr) { - epsToInsert.Insert(ipStr) - } - } - } - } - - v4ToAdd := v4Eps.Difference(cachedEps.v4).UnsortedList() - v6ToAdd := v6Eps.Difference(cachedEps.v6).UnsortedList() - v4ToDelete := cachedEps.v4.Difference(v4Eps).UnsortedList() - v6ToDelete := cachedEps.v6.Difference(v6Eps).UnsortedList() - - // Add rules for endpoints without one. - addRules := egressSVCIPTRulesForEndpoints(svc, v4ToAdd, v6ToAdd) - if err := appendIptRules(addRules); err != nil { - return fmt.Errorf("failed to add iptables rules for service %s/%s during update: %v", - svc.Namespace, svc.Name, err) - } - - // Update the cache with the added endpoints. - cachedEps.v4.Insert(v4ToAdd...) - cachedEps.v6.Insert(v6ToAdd...) - - // Delete rules for endpoints that should not have one. - delRules := egressSVCIPTRulesForEndpoints(svc, v4ToDelete, v6ToDelete) - if err := delIptRules(delRules); err != nil { - return fmt.Errorf("failed to delete iptables rules for service %s/%s during update: %v", - svc.Namespace, svc.Name, err) - } - - // Update the cache with the deleted endpoints. - cachedEps.v4.Delete(v4ToDelete...) - cachedEps.v6.Delete(v6ToDelete...) - return nil -} - -func delAllEgressSVCIptRules(svc *kapi.Service, npw *nodePortWatcher) error { - npw.egressServiceInfoLock.Lock() - defer npw.egressServiceInfoLock.Unlock() - key := ktypes.NamespacedName{Namespace: svc.Namespace, Name: svc.Name} - allEps, found := npw.egressServiceInfo[key] - if !found { - return nil - } - - v4ToDelete := make([]string, len(allEps.v4)) - v6ToDelete := make([]string, len(allEps.v6)) - for addr := range allEps.v4 { - v4ToDelete = append(v4ToDelete, addr) - } - for addr := range allEps.v6 { - v6ToDelete = append(v6ToDelete, addr) - } - - delRules := egressSVCIPTRulesForEndpoints(svc, v4ToDelete, v6ToDelete) - if err := delIptRules(delRules); err != nil { - return fmt.Errorf("failed to delete iptables rules for service %s/%s: %v", svc.Namespace, svc.Name, err) - } - - delete(npw.egressServiceInfo, key) - return nil -} - -func shouldConfigureEgressSVC(svc *kapi.Service, npw *nodePortWatcher) bool { - svcHost, _ := util.GetEgressSVCHost(svc) - - return util.HasEgressSVCAnnotation(svc) && - svcHost == npw.nodeName && - svc.Spec.Type == kapi.ServiceTypeLoadBalancer && - len(svc.Status.LoadBalancer.Ingress) > 0 -} diff --git a/go-controller/pkg/node/iptables/iptables.go b/go-controller/pkg/node/iptables/iptables.go new file mode 100644 index 0000000000..56be67e64c --- /dev/null +++ b/go-controller/pkg/node/iptables/iptables.go @@ -0,0 +1,82 @@ +package iptables + +import ( + "strings" + + "github.com/coreos/go-iptables/iptables" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/pkg/errors" + "k8s.io/klog/v2" +) + +// Rule represents an iptables rule. +type Rule struct { + Table string + Chain string + Args []string + Protocol iptables.Protocol +} + +// AddRules adds the given rules to iptables. +func AddRules(rules []Rule, append bool) error { + addErrors := errors.New("") + var err error + var ipt util.IPTablesHelper + var exists bool + for _, r := range rules { + klog.V(5).Infof("Adding rule in table: %s, chain: %s with args: \"%s\" for protocol: %v ", + r.Table, r.Chain, strings.Join(r.Args, " "), r.Protocol) + if ipt, err = util.GetIPTablesHelper(r.Protocol); err != nil { + addErrors = errors.Wrapf(addErrors, + "Failed to add iptables %s/%s rule %q: %v", r.Table, r.Chain, strings.Join(r.Args, " "), err) + continue + } + if err = ipt.NewChain(r.Table, r.Chain); err != nil { + klog.V(5).Infof("Chain: \"%s\" in table: \"%s\" already exists, skipping creation: %v", + r.Chain, r.Table, err) + } + exists, err = ipt.Exists(r.Table, r.Chain, r.Args...) + if !exists && err == nil { + if append { + err = ipt.Append(r.Table, r.Chain, r.Args...) + } else { + err = ipt.Insert(r.Table, r.Chain, 1, r.Args...) + } + } + if err != nil { + addErrors = errors.Wrapf(addErrors, "failed to add iptables %s/%s rule %q: %v", + r.Table, r.Chain, strings.Join(r.Args, " "), err) + } + } + if addErrors.Error() == "" { + addErrors = nil + } + return addErrors +} + +// DelRules deletes the given rules from iptables. +func DelRules(rules []Rule) error { + delErrors := errors.New("") + var err error + var ipt util.IPTablesHelper + for _, r := range rules { + klog.V(5).Infof("Deleting rule in table: %s, chain: %s with args: \"%s\" for protocol: %v ", + r.Table, r.Chain, strings.Join(r.Args, " "), r.Protocol) + if ipt, err = util.GetIPTablesHelper(r.Protocol); err != nil { + delErrors = errors.Wrapf(delErrors, + "Failed to delete iptables %s/%s rule %q: %v", r.Table, r.Chain, strings.Join(r.Args, " "), err) + continue + } + if exists, err := ipt.Exists(r.Table, r.Chain, r.Args...); err == nil && exists { + err := ipt.Delete(r.Table, r.Chain, r.Args...) + if err != nil { + delErrors = errors.Wrapf(delErrors, "failed to delete iptables %s/%s rule %q: %v", + r.Table, r.Chain, strings.Join(r.Args, " "), err) + } + } + } + if delErrors.Error() == "" { + delErrors = nil + } + return delErrors +} diff --git a/go-controller/pkg/node/management-port_linux_test.go b/go-controller/pkg/node/management-port_linux_test.go index c13a95a512..94b0021209 100644 --- a/go-controller/pkg/node/management-port_linux_test.go +++ b/go-controller/pkg/node/management-port_linux_test.go @@ -25,6 +25,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressipv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" + egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink" @@ -263,7 +264,7 @@ func testManagementPort(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns.Net _, err = config.InitConfig(ctx, fexec, nil) Expect(err).NotTo(HaveOccurred()) - nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}}, existingNode.Name) + nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs) wg := &sync.WaitGroup{} @@ -352,7 +353,7 @@ func testManagementPortDPU(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns. _, err = config.InitConfig(ctx, fexec, nil) Expect(err).NotTo(HaveOccurred()) - nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}}, existingNode.Name) + nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() wg := &sync.WaitGroup{} rm := newRouteManager(wg, true, 10*time.Second) diff --git a/go-controller/pkg/node/ovn_test.go b/go-controller/pkg/node/ovn_test.go index 666df463d2..1b94c0674a 100644 --- a/go-controller/pkg/node/ovn_test.go +++ b/go-controller/pkg/node/ovn_test.go @@ -6,6 +6,8 @@ import ( . "github.com/onsi/gomega" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressserviceapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" + egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" util "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -38,15 +40,22 @@ func NewFakeOVNNode(fexec *ovntest.FakeExec) *FakeOVNNode { } func (o *FakeOVNNode) start(ctx *cli.Context, objects ...runtime.Object) { + egressServiceObjects := []runtime.Object{} v1Objects := []runtime.Object{} for _, object := range objects { - v1Objects = append(v1Objects, object) + if _, isEgressServiceObject := object.(*egressserviceapi.EgressServiceList); isEgressServiceObject { + egressServiceObjects = append(egressServiceObjects, object) + } else { + v1Objects = append(v1Objects, object) + } } + _, err := config.InitConfig(ctx, o.fakeExec, nil) Expect(err).NotTo(HaveOccurred()) o.fakeClient = &util.OVNNodeClientset{ - KubeClient: fake.NewSimpleClientset(v1Objects...), + KubeClient: fake.NewSimpleClientset(v1Objects...), + EgressServiceClient: egressservicefake.NewSimpleClientset(egressServiceObjects...), } o.init() // initializes the node } diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index f531ec11b0..511e968843 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -62,7 +62,8 @@ type OVNMasterClientset struct { } type OVNNodeClientset struct { - KubeClient kubernetes.Interface + KubeClient kubernetes.Interface + EgressServiceClient egressserviceclientset.Interface } type OVNClusterManagerClientset struct { @@ -78,6 +79,7 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { CloudNetworkClient: cs.CloudNetworkClient, EgressQoSClient: cs.EgressQoSClient, MultiNetworkPolicyClient: cs.MultiNetworkPolicyClient, + EgressServiceClient: cs.EgressServiceClient, } } @@ -90,7 +92,8 @@ func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset func (cs *OVNClientset) GetNodeClientset() *OVNNodeClientset { return &OVNNodeClientset{ - KubeClient: cs.KubeClient, + KubeClient: cs.KubeClient, + EgressServiceClient: cs.EgressServiceClient, } } From 0e6f94b9426cebba5f3dbf65cb41ac8798036732 Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Sun, 12 Feb 2023 14:59:45 +0200 Subject: [PATCH 31/90] node-tests: Lock FakeIPTables Adding a Lock for modifying the map representing the FakeIPTables. This is needed to avoid race conditions for controllers that react to object events and calling the FakeIPTables. Signed-off-by: Ori Braunshtein --- go-controller/pkg/util/iptables.go | 48 +++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/go-controller/pkg/util/iptables.go b/go-controller/pkg/util/iptables.go index 0cf231eb3a..bd753d0e43 100644 --- a/go-controller/pkg/util/iptables.go +++ b/go-controller/pkg/util/iptables.go @@ -6,6 +6,7 @@ package util import ( "fmt" "strings" + "sync" "github.com/coreos/go-iptables/iptables" ) @@ -80,6 +81,7 @@ func (t *FakeTable) getChain(chainName string) ([]string, error) { type FakeIPTables struct { proto iptables.Protocol tables map[string]*FakeTable + sync.Mutex } // SetFakeIPTablesHelpers populates `helpers` with FakeIPTablesHelper that can be used in unit tests @@ -111,8 +113,23 @@ func (f *FakeIPTables) getTable(tableName string) (*FakeTable, error) { return table, nil } +func (f *FakeIPTables) newChain(tableName, chainName string) error { + table, err := f.getTable(tableName) + if err != nil { + return err + } + if _, err := table.getChain(chainName); err == nil { + // existing chain returns an error + return err + } + (*table)[chainName] = nil + return nil +} + // List rules in specified table/chain func (f *FakeIPTables) List(tableName, chainName string) ([]string, error) { + f.Lock() + defer f.Unlock() table, err := f.getTable(tableName) if err != nil { return nil, err @@ -126,6 +143,8 @@ func (f *FakeIPTables) List(tableName, chainName string) ([]string, error) { // ListChains returns the names of all chains in the table func (f *FakeIPTables) ListChains(tableName string) ([]string, error) { + f.Lock() + defer f.Unlock() table, ok := f.tables[tableName] if !ok { return nil, fmt.Errorf("table does not exist") @@ -139,21 +158,16 @@ func (f *FakeIPTables) ListChains(tableName string) ([]string, error) { // NewChain creates a new chain in the specified table func (f *FakeIPTables) NewChain(tableName, chainName string) error { - table, err := f.getTable(tableName) - if err != nil { - return err - } - if _, err := table.getChain(chainName); err == nil { - // existing chain returns an error - return err - } - (*table)[chainName] = nil - return nil + f.Lock() + defer f.Unlock() + return f.newChain(tableName, chainName) } // ClearChain removes all rules in the specified table/chain. // If the chain does not exist, a new one will be created func (f *FakeIPTables) ClearChain(tableName, chainName string) error { + f.Lock() + defer f.Unlock() table, err := f.getTable(tableName) if err != nil { return err @@ -163,12 +177,14 @@ func (f *FakeIPTables) ClearChain(tableName, chainName string) error { (*table)[chainName] = nil return nil } - return f.NewChain(tableName, chainName) + return f.newChain(tableName, chainName) } // DeleteChain deletes the chain in the specified table. // The chain must be empty func (f *FakeIPTables) DeleteChain(tableName, chainName string) error { + f.Lock() + defer f.Unlock() table, err := f.getTable(tableName) if err != nil { return err @@ -186,6 +202,8 @@ func (f *FakeIPTables) DeleteChain(tableName, chainName string) error { // Exists checks if given rulespec in specified table/chain exists func (f *FakeIPTables) Exists(tableName, chainName string, rulespec ...string) (bool, error) { + f.Lock() + defer f.Unlock() table, err := f.getTable(tableName) if err != nil { return false, err @@ -205,6 +223,8 @@ func (f *FakeIPTables) Exists(tableName, chainName string, rulespec ...string) ( // Insert inserts a rule into the specified table/chain func (f *FakeIPTables) Insert(tableName, chainName string, pos int, rulespec ...string) error { + f.Lock() + defer f.Unlock() table, err := f.getTable(tableName) if err != nil { return err @@ -225,6 +245,8 @@ func (f *FakeIPTables) Insert(tableName, chainName string, pos int, rulespec ... // Append appends rulespec to specified table/chain func (f *FakeIPTables) Append(tableName, chainName string, rulespec ...string) error { + f.Lock() + defer f.Unlock() table, err := f.getTable(tableName) if err != nil { return err @@ -240,6 +262,8 @@ func (f *FakeIPTables) Append(tableName, chainName string, rulespec ...string) e // Delete removes a rule from the specified table/chain func (f *FakeIPTables) Delete(tableName, chainName string, rulespec ...string) error { + f.Lock() + defer f.Unlock() table, err := f.getTable(tableName) if err != nil { return err @@ -261,6 +285,8 @@ func (f *FakeIPTables) Delete(tableName, chainName string, rulespec ...string) e // MatchState matches the expected state against the actual rules // code under test added to iptables func (f *FakeIPTables) MatchState(tables map[string]FakeTable) error { + f.Lock() + defer f.Unlock() if len(tables) != len(f.tables) { return fmt.Errorf("expected %d tables, got %d", len(tables), len(f.tables)) } From 1b7dd16ba0794b02ee4262f93b5df4a2420dc590 Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Tue, 17 Jan 2023 10:42:56 +0200 Subject: [PATCH 32/90] Update manifests for EgressService Signed-off-by: Ori Braunshtein --- contrib/kind.sh | 2 + dist/images/daemonset.sh | 11 ++ dist/images/ovnkube.sh | 23 ++++ .../k8s.ovn.org_egressservices.yaml.j2 | 110 ++++++++++++++++++ dist/templates/ovn-setup.yaml.j2 | 2 + dist/templates/ovnkube-cm-ncm.yaml.j2 | 4 + dist/templates/ovnkube-master.yaml.j2 | 2 + dist/templates/ovnkube-node.yaml.j2 | 2 + 8 files changed, 156 insertions(+) create mode 100644 dist/templates/k8s.ovn.org_egressservices.yaml.j2 diff --git a/contrib/kind.sh b/contrib/kind.sh index c99fbceb91..ed2d5b2d32 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -803,6 +803,7 @@ create_ovn_kube_manifests() { --egress-ip-healthcheck-port="${OVN_EGRESSIP_HEALTHCHECK_PORT}" \ --egress-firewall-enable=true \ --egress-qos-enable=true \ + --egress-service-enable=true \ --v4-join-subnet="${JOIN_SUBNET_IPV4}" \ --v6-join-subnet="${JOIN_SUBNET_IPV6}" \ --ex-gw-network-interface="${OVN_EX_GW_NETWORK_INTERFACE}" \ @@ -834,6 +835,7 @@ install_ovn() { run_kubectl apply -f k8s.ovn.org_egressfirewalls.yaml run_kubectl apply -f k8s.ovn.org_egressips.yaml run_kubectl apply -f k8s.ovn.org_egressqoses.yaml + run_kubectl apply -f k8s.ovn.org_egressservices.yaml run_kubectl apply -f ovn-setup.yaml MASTER_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}" | sort | head -n "${KIND_NUM_MASTER}") # We want OVN HA not Kubernetes HA diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index d78b48d953..a6a1f7894f 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -64,6 +64,7 @@ OVN_EGRESSIP_ENABLE= OVN_EGRESSIP_HEALTHCHECK_PORT= OVN_EGRESSFIREWALL_ENABLE= OVN_EGRESSQOS_ENABLE= +OVN_EGRESSSERVICE_ENABLE= OVN_DISABLE_OVN_IFACE_ID_VER="false" OVN_MULTI_NETWORK_ENABLE= OVN_V4_JOIN_SUBNET="" @@ -236,6 +237,9 @@ while [ "$1" != "" ]; do --multi-network-enable) OVN_MULTI_NETWORK_ENABLE=$VALUE ;; + --egress-service-enable) + OVN_EGRESSSERVICE_ENABLE=$VALUE + ;; --v4-join-subnet) OVN_V4_JOIN_SUBNET=$VALUE ;; @@ -363,6 +367,8 @@ ovn_egress_firewall_enable=${OVN_EGRESSFIREWALL_ENABLE} echo "ovn_egress_firewall_enable: ${ovn_egress_firewall_enable}" ovn_egress_qos_enable=${OVN_EGRESSQOS_ENABLE} echo "ovn_egress_qos_enable: ${ovn_egress_qos_enable}" +ovn_egress_service_enable=${OVN_EGRESSSERVICE_ENABLE} +echo "ovn_egress_service_enable: ${ovn_egress_service_enable}" ovn_disable_ovn_iface_id_ver=${OVN_DISABLE_OVN_IFACE_ID_VER} echo "ovn_disable_ovn_iface_id_ver: ${ovn_disable_ovn_iface_id_ver}" ovn_multi_network_enable=${OVN_MULTI_NETWORK_ENABLE} @@ -460,6 +466,7 @@ ovn_image=${ovnkube_image} \ ovn_egress_ip_enable=${ovn_egress_ip_enable} \ ovn_egress_ip_healthcheck_port=${ovn_egress_ip_healthcheck_port} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ + ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ ovn_monitor_all=${ovn_monitor_all} \ @@ -503,6 +510,7 @@ ovn_image=${image} \ ovn_multicast_enable=${ovn_multicast_enable} \ ovn_egress_ip_enable=${ovn_egress_ip_enable} \ ovn_egress_ip_healthcheck_port=${ovn_egress_ip_healthcheck_port} \ + ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_netflow_targets=${ovn_netflow_targets} \ ovn_sflow_targets=${ovn_sflow_targets} \ ovn_ipfix_targets=${ovn_ipfix_targets} \ @@ -538,6 +546,7 @@ ovn_image=${ovnkube_image} \ ovn_egress_firewall_enable=${ovn_egress_firewall_enable} \ ovn_egress_qos_enable=${ovn_egress_qos_enable} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ + ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_master_count=${ovn_master_count} \ ovn_gateway_mode=${ovn_gateway_mode} \ @@ -569,6 +578,7 @@ ovn_image=${image} \ ovn_egress_firewall_enable=${ovn_egress_firewall_enable} \ ovn_egress_qos_enable=${ovn_egress_qos_enable} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ + ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_master_count=${ovn_master_count} \ ovn_gateway_mode=${ovn_gateway_mode} \ @@ -638,5 +648,6 @@ cp ../templates/ovnkube-monitor.yaml.j2 ${output_dir}/ovnkube-monitor.yaml cp ../templates/k8s.ovn.org_egressfirewalls.yaml.j2 ${output_dir}/k8s.ovn.org_egressfirewalls.yaml cp ../templates/k8s.ovn.org_egressips.yaml.j2 ${output_dir}/k8s.ovn.org_egressips.yaml cp ../templates/k8s.ovn.org_egressqoses.yaml.j2 ${output_dir}/k8s.ovn.org_egressqoses.yaml +cp ../templates/k8s.ovn.org_egressservices.yaml.j2 ${output_dir}/k8s.ovn.org_egressservices.yaml exit 0 diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 0be068614c..46c85f70a4 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -76,6 +76,7 @@ fi # OVN_EGRESSIP_HEALTHCHECK_PORT - egress IP node check to use grpc on this port (0 ==> dial to port 9 instead) # OVN_EGRESSFIREWALL_ENABLE - enable egressFirewall for ovn-kubernetes # OVN_EGRESSQOS_ENABLE - enable egress QoS for ovn-kubernetes +# OVN_EGRESSSERVICE_ENABLE - enable egress Service for ovn-kubernetes # OVN_UNPRIVILEGED_MODE - execute CNI ovs/netns commands from host (default no) # OVNKUBE_NODE_MODE - ovnkube node mode of operation, one of: full, dpu, dpu-host (default: full) # OVNKUBE_NODE_MGMT_PORT_NETDEV - ovnkube node management port netdev. @@ -221,6 +222,8 @@ ovn_egress_ip_healthcheck_port=${OVN_EGRESSIP_HEALTHCHECK_PORT:-9107} ovn_egressfirewall_enable=${OVN_EGRESSFIREWALL_ENABLE:-false} #OVN_EGRESSQOS_ENABLE - enable egress QoS for ovn-kubernetes ovn_egressqos_enable=${OVN_EGRESSQOS_ENABLE:-false} +#OVN_EGRESSSERVICE_ENABLE - enable egress Service for ovn-kubernetes +ovn_egressservice_enable=${OVN_EGRESSSERVICE_ENABLE:-false} #OVN_DISABLE_OVN_IFACE_ID_VER - disable usage of the OVN iface-id-ver option ovn_disable_ovn_iface_id_ver=${OVN_DISABLE_OVN_IFACE_ID_VER:-false} #OVN_MULTI_NETWORK_ENABLE - enable multiple network support for ovn-kubernetes @@ -997,6 +1000,12 @@ ovn-master() { fi echo "multi_network_enabled_flag=${multi_network_enabled_flag}" + egressservice_enabled_flag= + if [[ ${ovn_egressservice_enable} == "true" ]]; then + egressservice_enabled_flag="--enable-egress-service" + fi + echo "egressservice_enabled_flag=${egressservice_enabled_flag}" + ovnkube_master_metrics_bind_address="${metrics_endpoint_ip}:9409" local ovnkube_metrics_tls_opts="" if [[ ${OVNKUBE_METRICS_PK} != "" && ${OVNKUBE_METRICS_CERT} != "" ]]; then @@ -1059,6 +1068,7 @@ ovn-master() { ${egressip_healthcheck_port_flag} \ ${egressfirewall_enabled_flag} \ ${egressqos_enabled_flag} \ + ${egressservice_enabled_flag} \ ${ovnkube_config_duration_enable_flag} \ ${ovnkube_metrics_scale_enable_flag} \ ${multi_network_enabled_flag} \ @@ -1189,6 +1199,12 @@ ovn-network-controller-manager() { fi echo "multi_network_enabled_flag=${multi_network_enabled_flag}" + egressservice_enabled_flag= + if [[ ${ovn_egressservice_enable} == "true" ]]; then + egressservice_enabled_flag="--enable-egress-service" + fi + echo "egressservice_enabled_flag=${egressservice_enabled_flag}" + ovnkube_master_metrics_bind_address="${metrics_endpoint_ip}:9409" echo "ovnkube_master_metrics_bind_address=${ovnkube_master_metrics_bind_address}" @@ -1232,6 +1248,7 @@ ovn-network-controller-manager() { ${egressip_healthcheck_port_flag} \ ${egressfirewall_enabled_flag} \ ${egressqos_enabled_flag} \ + ${egressservice_enabled_flag} \ ${ovnkube_config_duration_enable_flag} \ ${multi_network_enabled_flag} \ --metrics-bind-address ${ovnkube_master_metrics_bind_address} \ @@ -1429,6 +1446,11 @@ ovn-node() { egressip_healthcheck_port_flag="--egressip-node-healthcheck-port=${ovn_egress_ip_healthcheck_port}" fi + egressservice_enabled_flag= + if [[ ${ovn_egressservice_enable} == "true" ]]; then + egressservice_enabled_flag="--enable-egress-service" + fi + disable_ovn_iface_id_ver_flag= if [[ ${ovn_disable_ovn_iface_id_ver} == "true" ]]; then disable_ovn_iface_id_ver_flag="--disable-ovn-iface-id-ver" @@ -1592,6 +1614,7 @@ ovn-node() { ${multicast_enabled_flag} \ ${egressip_enabled_flag} \ ${egressip_healthcheck_port_flag} \ + ${egressservice_enabled_flag} \ ${disable_ovn_iface_id_ver_flag} \ ${multi_network_enabled_flag} \ ${netflow_targets} \ diff --git a/dist/templates/k8s.ovn.org_egressservices.yaml.j2 b/dist/templates/k8s.ovn.org_egressservices.yaml.j2 new file mode 100644 index 0000000000..4d79ec6907 --- /dev/null +++ b/dist/templates/k8s.ovn.org_egressservices.yaml.j2 @@ -0,0 +1,110 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.11.3 + creationTimestamp: null + name: egressservices.k8s.ovn.org +spec: + group: k8s.ovn.org + names: + kind: EgressService + listKind: EgressServiceList + plural: egressservices + singular: egressservice + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: EgressService is a CRD that allows the user to request that the + source IP of egress packets originating from all of the pods that are endpoints + of a given LoadBalancer Service would be its ingress IP. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: EgressServiceSpec defines the desired state of EgressService + properties: + network: + description: The network which this service should send egress and + corresponding ingress replies to. This is typically implemented + as VRF mapping, representing a numeric id or string name of a routing + table which by omission uses the default host routing. + type: string + nodeSelector: + description: Allows limiting the nodes that can be selected to handle + the service's traffic. When present only a node whose labels match + the specified selectors can be selected for handling the service's + traffic. When it is not specified any node in the cluster can be + chosen to manage the service's traffic. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: object + status: + description: EgressServiceStatus defines the observed state of EgressService + properties: + host: + description: The name of the node selected to handle the service's + traffic. + type: string + required: + - host + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/dist/templates/ovn-setup.yaml.j2 b/dist/templates/ovn-setup.yaml.j2 index a43c3176c3..1319ee5b4b 100644 --- a/dist/templates/ovn-setup.yaml.j2 +++ b/dist/templates/ovn-setup.yaml.j2 @@ -85,6 +85,8 @@ rules: - egressfirewalls - egressips - egressqoses + - egressservices + - egressservices/status verbs: ["list", "get", "watch", "update", "patch"] - apiGroups: - apiextensions.k8s.io diff --git a/dist/templates/ovnkube-cm-ncm.yaml.j2 b/dist/templates/ovnkube-cm-ncm.yaml.j2 index 14d5430248..bd974fc59b 100644 --- a/dist/templates/ovnkube-cm-ncm.yaml.j2 +++ b/dist/templates/ovnkube-cm-ncm.yaml.j2 @@ -204,6 +204,8 @@ spec: value: "{{ ovn_egress_qos_enable }}" - name: OVN_MULTI_NETWORK_ENABLE value: "{{ ovn_multi_network_enable }}" + - name: OVN_EGRESSSERVICE_ENABLE + value: "{{ ovn_egress_service_enable }}" - name: OVN_HYBRID_OVERLAY_NET_CIDR value: "{{ ovn_hybrid_overlay_net_cidr }}" - name: OVN_DISABLE_SNAT_MULTIPLE_GWS @@ -304,6 +306,8 @@ spec: value: "{{ ovn_egress_qos_enable }}" - name: OVN_MULTI_NETWORK_ENABLE value: "{{ ovn_multi_network_enable }}" + - name: OVN_EGRESSSERVICE_ENABLE + value: "{{ ovn_egress_service_enable }}" - name: OVN_HYBRID_OVERLAY_NET_CIDR value: "{{ ovn_hybrid_overlay_net_cidr }}" - name: OVN_DISABLE_SNAT_MULTIPLE_GWS diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index dea09ac1c7..59d47d1f10 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -241,6 +241,8 @@ spec: value: "{{ ovn_egress_qos_enable }}" - name: OVN_MULTI_NETWORK_ENABLE value: "{{ ovn_multi_network_enable }}" + - name: OVN_EGRESSSERVICE_ENABLE + value: "{{ ovn_egress_service_enable }}" - name: OVN_HYBRID_OVERLAY_NET_CIDR value: "{{ ovn_hybrid_overlay_net_cidr }}" - name: OVN_DISABLE_SNAT_MULTIPLE_GWS diff --git a/dist/templates/ovnkube-node.yaml.j2 b/dist/templates/ovnkube-node.yaml.j2 index 34b7baae47..660d883c7a 100644 --- a/dist/templates/ovnkube-node.yaml.j2 +++ b/dist/templates/ovnkube-node.yaml.j2 @@ -155,6 +155,8 @@ spec: value: "{{ ovn_egress_ip_enable }}" - name: OVN_EGRESSIP_HEALTHCHECK_PORT value: "{{ ovn_egress_ip_healthcheck_port }}" + - name: OVN_EGRESSSERVICE_ENABLE + value: "{{ ovn_egress_service_enable }}" - name: OVN_HYBRID_OVERLAY_NET_CIDR value: "{{ ovn_hybrid_overlay_net_cidr }}" - name: OVN_DISABLE_SNAT_MULTIPLE_GWS From 5322b5ceaae598c6eddf79e6860aad9a3c3f7939 Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Tue, 17 Jan 2023 10:44:01 +0200 Subject: [PATCH 33/90] Update EgressService E2E Signed-off-by: Ori Braunshtein --- test/e2e/egress_services.go | 262 +++++++++++++++++++++++++++--------- 1 file changed, 198 insertions(+), 64 deletions(-) diff --git a/test/e2e/egress_services.go b/test/e2e/egress_services.go index c1fcf2eaab..5e1afebd35 100644 --- a/test/e2e/egress_services.go +++ b/test/e2e/egress_services.go @@ -2,8 +2,8 @@ package e2e import ( "context" + "encoding/json" "fmt" - "io/ioutil" "net" "os" "strings" @@ -23,6 +23,7 @@ import ( var _ = ginkgo.Describe("Egress Services", func() { const ( + egressServiceYAML = "egress_service.yaml" externalContainerName = "external-container-for-egress-service" podHTTPPort = 8080 serviceName = "test-egress-service" @@ -69,7 +70,9 @@ var _ = ginkgo.Describe("Egress Services", func() { i := i podsCreateSync.Go(func() error { p, err := createGenericPodWithLabel(f, name, nodes[i].Name, f.Namespace.Name, command, podsLabels) - framework.Logf("%s podIPs are: %v", p.Name, p.Status.PodIPs) + if p != nil { + framework.Logf("%s podIPs are: %v", p.Name, p.Status.PodIPs) + } return err }) } @@ -78,8 +81,24 @@ var _ = ginkgo.Describe("Egress Services", func() { framework.ExpectNoError(err, "failed to create backend pods") ginkgo.By("Creating an egress service without node selectors") - svc := createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, - map[string]string{"k8s.ovn.org/egress-service": "{}"}, podsLabels, podHTTPPort) + egressServiceConfig := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: ` + serviceName + ` + namespace: ` + f.Namespace.Name + ` +`) + + if err := os.WriteFile(egressServiceYAML, []byte(egressServiceConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressServiceYAML); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + framework.RunKubectlOrDie(f.Namespace.Name, "create", "-f", egressServiceYAML) + svc := createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, podsLabels, podHTTPPort) svcIP := svc.Status.LoadBalancer.Ingress[0].IP ginkgo.By("Getting the IPs of the node in charge of the service") @@ -108,12 +127,8 @@ var _ = ginkgo.Describe("Egress Services", func() { // This is to be sure we did not break ingress traffic for the service reachAllServiceBackendsFromExternalContainer(externalContainerName, svcIP, podHTTPPort, pods) - ginkgo.By("Resetting the service's annotations the backend pods should exit with their node's IP") - svc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Get(context.TODO(), serviceName, metav1.GetOptions{}) - framework.ExpectNoError(err, "failed to get service") - svc.Annotations = map[string]string{} - _, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Update(context.TODO(), svc, metav1.UpdateOptions{}) - framework.ExpectNoError(err, "failed to reset service's annotations") + ginkgo.By("Deleting the EgressService the backend pods should exit with their node's IP") + framework.RunKubectlOrDie(f.Namespace.Name, "delete", "-f", egressServiceYAML) for i, pod := range pods { node := &nodes[i] @@ -156,8 +171,24 @@ var _ = ginkgo.Describe("Egress Services", func() { framework.ExpectNoError(err, "failed to create backend pods") ginkgo.By("Creating an egress service without node selectors") - _ = createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, - map[string]string{"k8s.ovn.org/egress-service": "{}"}, podsLabels, podHTTPPort) + egressServiceConfig := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: ` + serviceName + ` + namespace: ` + f.Namespace.Name + ` +`) + + if err := os.WriteFile(egressServiceYAML, []byte(egressServiceConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressServiceYAML); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + framework.RunKubectlOrDie(f.Namespace.Name, "create", "-f", egressServiceYAML) + _ = createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, podsLabels, podHTTPPort) ginkgo.By("Getting the IPs of the node in charge of the service") egressHost, _, _ := getEgressSVCHost(f.ClientSet, f.Namespace.Name, serviceName) @@ -224,8 +255,28 @@ var _ = ginkgo.Describe("Egress Services", func() { ginkgo.By("Creating an egress service selecting the first node") firstNode := nodes[0].Name - svc := createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, - map[string]string{"k8s.ovn.org/egress-service": fmt.Sprintf("{\"nodeSelector\":{\"matchLabels\":{\"kubernetes.io/hostname\": \"%s\"}}}", firstNode)}, podsLabels, podHTTPPort) + egressServiceConfig := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: ` + serviceName + ` + namespace: ` + f.Namespace.Name + ` +spec: + nodeSelector: + matchLabels: + kubernetes.io/hostname: ` + firstNode + ` +`) + + if err := os.WriteFile(egressServiceYAML, []byte(egressServiceConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressServiceYAML); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + framework.RunKubectlOrDie(f.Namespace.Name, "create", "-f", egressServiceYAML) + svc := createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, podsLabels, podHTTPPort) svcIP := svc.Status.LoadBalancer.Ingress[0].IP ginkgo.By("Verifying the first node was picked for handling the service's egress traffic") @@ -255,18 +306,28 @@ var _ = ginkgo.Describe("Egress Services", func() { // This is to be sure we did not break ingress traffic for the service reachAllServiceBackendsFromExternalContainer(externalContainerName, svcIP, podHTTPPort, pods) - ginkgo.By("Updating the service to select the second node") + ginkgo.By("Updating the egress service to select the second node") secondNode := nodes[1].Name - svc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Get(context.TODO(), serviceName, metav1.GetOptions{}) - framework.ExpectNoError(err, "failed to get service") - svc.Annotations = map[string]string{"k8s.ovn.org/egress-service": fmt.Sprintf("{\"nodeSelector\":{\"matchLabels\":{\"kubernetes.io/hostname\": \"%s\"}}}", secondNode)} - _, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Update(context.TODO(), svc, metav1.UpdateOptions{}) - framework.ExpectNoError(err, "failed to update service's annotations") + egressServiceConfig = fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: ` + serviceName + ` + namespace: ` + f.Namespace.Name + ` +spec: + nodeSelector: + matchLabels: + kubernetes.io/hostname: ` + secondNode + ` +`) + if err := os.WriteFile(egressServiceYAML, []byte(egressServiceConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + framework.RunKubectlOrDie(f.Namespace.Name, "apply", "-f", egressServiceYAML) ginkgo.By("Verifying the second node now handles the service's egress traffic") - node, egressHostV4IP, egressHostV6IP = getEgressSVCHost(f.ClientSet, svc.Namespace, svc.Name) + node, egressHostV4IP, egressHostV6IP = getEgressSVCHost(f.ClientSet, f.Namespace.Name, serviceName) framework.ExpectEqual(node.Name, secondNode, "the wrong node got selected for egress service") - nodeList, err := f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: fmt.Sprintf("egress-service.k8s.ovn.org/%s-%s=", svc.Namespace, svc.Name)}) + nodeList, err := f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: fmt.Sprintf("egress-service.k8s.ovn.org/%s-%s=", f.Namespace.Name, serviceName)}) framework.ExpectNoError(err, "failed to list nodes") framework.ExpectEqual(len(nodeList.Items), 1, fmt.Sprintf("expected only one node labeled for the service, got %v", nodeList.Items)) @@ -292,14 +353,25 @@ var _ = ginkgo.Describe("Egress Services", func() { ginkgo.By("Verifying the external container can reach all of the service's backend pods") reachAllServiceBackendsFromExternalContainer(externalContainerName, svcIP, podHTTPPort, pods) - ginkgo.By("Updating the service to select no node") - svc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Get(context.TODO(), serviceName, metav1.GetOptions{}) - framework.ExpectNoError(err, "failed to get service") - svc.Annotations = map[string]string{"k8s.ovn.org/egress-service": "{\"nodeSelector\":{\"matchLabels\":{\"perfect\": \"match\"}}}"} - _, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Update(context.TODO(), svc, metav1.UpdateOptions{}) - framework.ExpectNoError(err, "failed to update service's annotations") + ginkgo.By("Updating the egress service selector to match no node") + egressServiceConfig = fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: ` + serviceName + ` + namespace: ` + f.Namespace.Name + ` +spec: + nodeSelector: + matchLabels: + perfect: match +`) + if err := os.WriteFile(egressServiceYAML, []byte(egressServiceConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + framework.RunKubectlOrDie(f.Namespace.Name, "apply", "-f", egressServiceYAML) + gomega.Eventually(func() error { - nodeList, err := f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: fmt.Sprintf("egress-service.k8s.ovn.org/%s-%s=", svc.Namespace, svc.Name)}) + nodeList, err := f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: fmt.Sprintf("egress-service.k8s.ovn.org/%s-%s=", f.Namespace.Name, serviceName)}) if err != nil { return err } @@ -347,9 +419,9 @@ var _ = ginkgo.Describe("Egress Services", func() { }() ginkgo.By("Verifying the third node now handles the service's egress traffic") - node, egressHostV4IP, egressHostV6IP = getEgressSVCHost(f.ClientSet, svc.Namespace, svc.Name) + node, egressHostV4IP, egressHostV6IP = getEgressSVCHost(f.ClientSet, f.Namespace.Name, serviceName) framework.ExpectEqual(node.Name, thirdNode, "the wrong node got selected for egress service") - nodeList, err = f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: fmt.Sprintf("egress-service.k8s.ovn.org/%s-%s=", svc.Namespace, svc.Name)}) + nodeList, err = f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: fmt.Sprintf("egress-service.k8s.ovn.org/%s-%s=", f.Namespace.Name, serviceName)}) framework.ExpectNoError(err, "failed to list nodes") framework.ExpectEqual(len(nodeList.Items), 1, fmt.Sprintf("expected only one node labeled for the service, got %v", nodeList.Items)) @@ -388,7 +460,9 @@ var _ = ginkgo.Describe("Egress Services", func() { i := i podsCreateSync.Go(func() error { p, err := createGenericPodWithLabel(f, name, nodes[i].Name, f.Namespace.Name, command, labels) - framework.Logf("%s podIPs are: %v", p.Name, p.Status.PodIPs) + if p != nil { + framework.Logf("%s podIPs are: %v", p.Name, p.Status.PodIPs) + } return err }) } @@ -397,8 +471,24 @@ var _ = ginkgo.Describe("Egress Services", func() { framework.ExpectNoError(err, "failed to create backend pods") ginkgo.By("Creating an egress service without node selectors") - svc := createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, - map[string]string{"k8s.ovn.org/egress-service": "{}"}, labels, podHTTPPort) + egressServiceConfig := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: ` + serviceName + ` + namespace: ` + f.Namespace.Name + ` +`) + + if err := os.WriteFile(egressServiceYAML, []byte(egressServiceConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressServiceYAML); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + framework.RunKubectlOrDie(f.Namespace.Name, "create", "-f", egressServiceYAML) + svc := createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, labels, podHTTPPort) svcIP := svc.Status.LoadBalancer.Ingress[0].IP ginkgo.By("Getting the IPs of the node in charge of the service") @@ -439,7 +529,7 @@ spec: kubernetes.io/metadata.name: ` + f.Namespace.Name + ` `) - if err := ioutil.WriteFile(egressIPYaml, []byte(egressIPConfig), 0644); err != nil { + if err := os.WriteFile(egressIPYaml, []byte(egressIPConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) } defer func() { @@ -474,12 +564,9 @@ spec: // This is to be sure we did not break ingress traffic for the service reachAllServiceBackendsFromExternalContainer(externalContainerName, svcIP, podHTTPPort, pods) - ginkgo.By("Resetting the service's annotations the backend pods should exit with the EgressIP") - svc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Get(context.TODO(), serviceName, metav1.GetOptions{}) - framework.ExpectNoError(err, "failed to get service") - svc.Annotations = map[string]string{} - _, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Update(context.TODO(), svc, metav1.UpdateOptions{}) - framework.ExpectNoError(err, "failed to reset service's annotations") + ginkgo.By("Deleting the EgressService the backend pods should exit with the EgressIP") + framework.RunKubectlOrDie(f.Namespace.Name, "delete", "-f", egressServiceYAML) + for _, pod := range pods { gomega.Eventually(func() error { return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, egressIP.String(), *dstIP, podHTTPPort) @@ -505,7 +592,9 @@ spec: name := name podsCreateSync.Go(func() error { p, err := createGenericPodWithLabel(f, name, secondNode, f.Namespace.Name, command, podsLabels) - framework.Logf("%s podIPs are: %v", p.Name, p.Status.PodIPs) + if p != nil { + framework.Logf("%s podIPs are: %v", p.Name, p.Status.PodIPs) + } return err }) } @@ -514,9 +603,29 @@ spec: framework.ExpectNoError(err, "failed to create backend pods") ginkgo.By("Creating an ETP=Local egress service selecting the first node") - svc := createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, - map[string]string{"k8s.ovn.org/egress-service": fmt.Sprintf("{\"nodeSelector\":{\"matchLabels\":{\"kubernetes.io/hostname\": \"%s\"}}}", firstNode)}, - podsLabels, podHTTPPort, func(svc *v1.Service) { + egressServiceConfig := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: ` + serviceName + ` + namespace: ` + f.Namespace.Name + ` +spec: + nodeSelector: + matchLabels: + kubernetes.io/hostname: ` + firstNode + ` +`) + + if err := os.WriteFile(egressServiceYAML, []byte(egressServiceConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressServiceYAML); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + framework.RunKubectlOrDie(f.Namespace.Name, "create", "-f", egressServiceYAML) + svc := createLBServiceWithIngressIP(f.ClientSet, f.Namespace.Name, serviceName, protocol, podsLabels, podHTTPPort, + func(svc *v1.Service) { svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal }) svcIP := svc.Status.LoadBalancer.Ingress[0].IP @@ -530,14 +639,12 @@ spec: return fmt.Errorf("expected no nodes to be labeled for the service, got %v", nodeList.Items) } - svc, err := f.ClientSet.CoreV1().Services(f.Namespace.Name).Get(context.TODO(), serviceName, metav1.GetOptions{}) + status, err := getEgressServiceStatus(f.Namespace.Name, serviceName) if err != nil { return err } - - _, found := svc.Annotations["k8s.ovn.org/egress-service-host"] - if found { - return fmt.Errorf("expected no egress-service-host annotation on service, got: %v", svc.Annotations) + if status.Host != "" { + return fmt.Errorf("expected no host for egress service %s/%s got: %v", f.Namespace.Name, serviceName, status.Host) } return nil @@ -545,7 +652,9 @@ spec: ginkgo.By("Creating the third backend pod on the first node") p, err := createGenericPodWithLabel(f, pods[2], firstNode, f.Namespace.Name, command, podsLabels) - framework.Logf("%s podIPs are: %v", p.Name, p.Status.PodIPs) + if p != nil { + framework.Logf("%s podIPs are: %v", p.Name, p.Status.PodIPs) + } framework.ExpectNoError(err) ginkgo.By("Verifying the first node was selected for the service") @@ -584,14 +693,12 @@ spec: return fmt.Errorf("expected no nodes to be labeled for the service, got %v", nodeList.Items) } - svc, err := f.ClientSet.CoreV1().Services(f.Namespace.Name).Get(context.TODO(), serviceName, metav1.GetOptions{}) + status, err := getEgressServiceStatus(f.Namespace.Name, serviceName) if err != nil { return err } - - _, found := svc.Annotations["k8s.ovn.org/egress-service-host"] - if found { - return fmt.Errorf("expected no egress-service-host annotation on service, got: %v", svc.Annotations) + if status.Host != "" { + return fmt.Errorf("expected no host for egress service %s/%s got: %v", f.Namespace.Name, serviceName, status.Host) } return nil @@ -621,12 +728,11 @@ spec: }) // Creates a LoadBalancer service with the given IP and verifies it was set correctly. -func createLBServiceWithIngressIP(cs kubernetes.Interface, namespace, name string, protocol v1.IPFamily, annotations, selector map[string]string, port int32, tweak ...func(svc *v1.Service)) *v1.Service { +func createLBServiceWithIngressIP(cs kubernetes.Interface, namespace, name string, protocol v1.IPFamily, selector map[string]string, port int32, tweak ...func(svc *v1.Service)) *v1.Service { svc := &v1.Service{ ObjectMeta: metav1.ObjectMeta{ - Namespace: namespace, - Name: name, - Annotations: annotations, + Namespace: namespace, + Name: name, }, Spec: v1.ServiceSpec{ Selector: selector, @@ -668,6 +774,29 @@ func createLBServiceWithIngressIP(cs kubernetes.Interface, namespace, name strin return svc } +type egressServiceStatus struct { + Host string `json:"host"` +} + +type egressService struct { + Status egressServiceStatus `json:"status,omitempty"` +} + +func getEgressServiceStatus(ns, name string) (egressServiceStatus, error) { + egressService := &egressService{} + egressServiceStdout, err := framework.RunKubectl(ns, "get", "egressservice", "-o", "json", name) + if err != nil { + framework.Logf("Error: failed to get the EgressService object, err: %v", err) + return egressServiceStatus{}, err + } + err = json.Unmarshal([]byte(egressServiceStdout), egressService) + if err != nil { + return egressServiceStatus{}, err + } + + return egressService.Status, nil +} + // Returns the node in charge of the egress service's traffic and its v4/v6 addresses. func getEgressSVCHost(cs kubernetes.Interface, svcNamespace, svcName string) (*v1.Node, string, string) { egressHost := &v1.Node{} @@ -680,9 +809,14 @@ func getEgressSVCHost(cs kubernetes.Interface, svcNamespace, svcName string) (*v return err } - svcEgressHost, found := svc.Annotations["k8s.ovn.org/egress-service-host"] - if !found { - return fmt.Errorf("egress-service-host annotation missing from service, got: %v", svc.Annotations) + egressServiceStatus, err := getEgressServiceStatus(svcNamespace, svcName) + if err != nil { + return err + } + + svcEgressHost := egressServiceStatus.Host + if svcEgressHost == "" { + return fmt.Errorf("egress service %s/%s does not have a host", svcNamespace, svcName) } egressHost, err = cs.CoreV1().Nodes().Get(context.TODO(), svcEgressHost, metav1.GetOptions{}) @@ -690,7 +824,7 @@ func getEgressSVCHost(cs kubernetes.Interface, svcNamespace, svcName string) (*v return err } - _, found = egressHost.Labels[fmt.Sprintf("egress-service.k8s.ovn.org/%s-%s", svc.Namespace, svc.Name)] + _, found := egressHost.Labels[fmt.Sprintf("egress-service.k8s.ovn.org/%s-%s", svc.Namespace, svc.Name)] if !found { return fmt.Errorf("node %s does not have the label for egress service %s/%s, labels: %v", egressHost.Name, svc.Namespace, svc.Name, egressHost.Labels) From 19de9595e2116b98e80805bc265d73765560661d Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Tue, 24 Jan 2023 13:02:14 +0200 Subject: [PATCH 34/90] Update EgressService docs Reflects the change from annotation to a CRD and the new "Network" field Signed-off-by: Ori Braunshtein --- docs/egress-service.md | 135 +++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 51 deletions(-) diff --git a/docs/egress-service.md b/docs/egress-service.md index dedeaba601..026f030ed7 100644 --- a/docs/egress-service.md +++ b/docs/egress-service.md @@ -5,7 +5,10 @@ The Egress Service feature enables the egress traffic of pods backing a LoadBalancer service to exit the cluster using its ingress IP. This is useful for external systems that communicate with applications running on the Kubernetes cluster through a LoadBalancer service and expect that the source IP of egress traffic originating from the pods backing the service is identical to the destination IP they use to reach them - i.e the LoadBalancer's ingress IP. -This functionality can be toggled by annotating a LoadBalancer service, making the source IP of egress packets originating from all of the non host-networked pods that are endpoints of it to be its ingress IP. +By introducing a new CRD `EgressService`, users could request that the source IP of egress packets originating from all of the pods that are endpoints of a LoadBalancer service would be its ingress IP. +The CRD is namespace scoped. The name of the EgressService corresponds to the name of a LoadBalancer Service that should be affected by this functionality. Note the mapping of EgressService to Kubernetes Service is 1to1. +The feature will be supported by both "Shared" and "Local" gateway modes and the affected traffic will be that which is coming from a pod to a destination outside of the cluster - meaning pod-pod / pod-service / pod-node traffic will not be affected. + Announcing the service externally (for ingress traffic) is handled by a LoadBalancer provider (like MetalLB) and not by OVN-Kubernetes as explained later. ## Details @@ -18,15 +21,19 @@ The egress part is handled by OVN-Kubernetes, which chooses a node that acts as When that traffic reaches the node's mgmt port it will use its routing table and iptables before heading out. Because of that, it takes care of adding the necessary iptables rules on the selected node to SNAT traffic exiting from these pods to the service's ingress IP. -These goals are achieved by introducing an annotation for users to set on LoadBalancer services: `k8s.ovn.org/egress-service`, which can be either empty (`'{}'`) or contain a `nodeSelector` field: `'{"nodeSelector":{"matchLabels":{"size": "large"}}}'` that allows limiting the nodes that can be selected to handle the service's traffic. -By specifying the `nodeSelector` field, only a node whose labels match the specified selectors can be selected for handling the service's traffic as explained earlier. -By not specifying the `nodeSelector` field any node in the cluster can be chosen to manage the service's traffic. +These goals are achieved by introducing a new resource `EgressService` for users to create alongside LoadBalancer services which can be either empty or contain optional fields: +- `nodeSelector`: allows limiting the nodes that can be selected to handle the service's traffic. +When present only a node whose labels match the specified selectors can be selected for handling the service's traffic as explained earlier. +When the field is not specified any node in the cluster can be chosen to manage the service's traffic. In addition, if the service's `ExternalTrafficPolicy` is set to `Local` an additional constraint is added that only a node that has an endpoint can be selected. -When a node is selected to handle the service's traffic both the service is annotated with `k8s.ovn.org/egress-service-host=` (which is consumed by `ovnkube-node`) and the node is labeled with `egress-service.k8s.ovn.org/-: ""`, which can be consumed by a LoadBalancer provider to handle the ingress part. +- `network`: The network which this service should send egress and corresponding ingress replies to. +This is typically implemented as VRF mapping, representing a numeric id or string name of a routing table which by omission uses the default host routing. + +When a node is selected to handle the service's traffic both the status of the relevant `EgressService` is updated with `host: ` (which is consumed by `ovnkube-node`) and the node is labeled with `egress-service.k8s.ovn.org/-: ""`, which can be consumed by a LoadBalancer provider to handle the ingress part. Similarly to the EgressIP feature, once a node is selected it is checked for readiness (TCP/gRPC) to serve traffic every x seconds. -If a node fails the health check, its allocated services move to another node by removing the `egress-service.k8s.ovn.org/-: ""` label from it, removing the logical router policies from the cluster router, resetting the `k8s.ovn.org/egress-service-host=` annotation on each of the services and requeuing them - causing a new node to be selected for the service. +If a node fails the health check, its allocated services move to another node by removing the `egress-service.k8s.ovn.org/-: ""` label from it, removing the logical router policies from the cluster router, resetting the status of the relevant `EgressServices` and requeuing them - causing a new node to be selected for the services. If the node becomes not ready or its labels no longer match the service's selectors the same re-election process happens. The ingress part is handled by a LoadBalancer provider, such as MetalLB, that needs to select the right node (and only it) for announcing the LoadBalancer service (ingress traffic) according to the `egress-service.k8s.ovn.org/-: ""` label set by OVN-Kubernetes. @@ -70,10 +77,10 @@ Notice how the packet exits `ovn-worker`'s eth1 and not breth0, as the packet go ## Changes in OVN northbound database and iptables -The feature is implemented by reacting to events from `Services`, `EndpointSlices` and `Nodes` changes - +The feature is implemented by reacting to events from `EgressServices`, `Services`, `EndpointSlices` and `Nodes` changes - updating OVN's northbound database `Logical_Router_Policy` objects to steer the traffic to the selected node and creating iptables SNAT rules in its `OVN-KUBE-EGRESS-SVC` chain, which is called by the POSTROUTING chain of its nat table. -We'll see how the related objects are changed once a LoadBalancer is requested to act as an "Egress Service" by annotating it with the `k8s.ovn.org/egress-service` annotation in a Dual-Stack kind cluster. +We'll see how the related objects are changed once a LoadBalancer is requested to act as an "Egress Service" by creating a corresponding `EgressService` named after it in a Dual-Stack kind cluster. We start with a clean cluster: ``` @@ -88,7 +95,6 @@ ovn-worker2 Ready worker $ kubectl describe svc demo-svc Name: demo-svc Namespace: default -Annotations: Type: LoadBalancer LoadBalancer Ingress: 5.5.5.5, 5555:5555:5555:5555:5555:5555:5555:5555 Endpoints: 10.244.0.5:8080,10.244.2.7:8080 @@ -120,26 +126,37 @@ Routing Policies At this point nothing related to Egress Services is in place. It is worth noting that the "allow" policies (102's) that make sure east-west traffic is not affected for EgressIPs are present here as well - if the EgressIP feature is enabled it takes care of creating them, otherwise the "Egress Service" feature does (sharing the same logic), as we do not want Egress Services to change the behavior of east-west traffic. Also, the policies created (seen later) for an Egress Service use a higher priority than the EgressIP ones, which means that if a pod belongs to both an EgressIP and an Egress Service the service's ingress IP will be used for the SNAT. -We now request that our service will act as an "Egress Service" by annotating it, with the constraint that only a node with the `"node-role.kubernetes.io/worker": ""` label can be selected to handle its traffic: +We now request that our service "demo-svc" will act as an "Egress Service" by creating a corresponding `EgressService`, with the constraint that only a node with the `"node-role.kubernetes.io/worker": ""` label can be selected to handle its traffic: ``` -$ kubectl annotate svc demo-svc k8s.ovn.org/egress-service='{"nodeSelector":{"matchLabels":{"node-role.kubernetes.io/worker": ""}}}' -service/demo-svc annotated +$ cat egress-service.yaml +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: demo-svc + namespace: default +spec: + nodeSelector: + matchLabels: + node-role.kubernetes.io/worker: "" + +$ kubectl apply -f egress-service.yaml +egressservice.k8s.ovn.org/demo-svc created ``` -Once the service is annotated a node is selected to handle all of its traffic (ingress/egress) as described earlier. -The service is annotated with its name, logical router policies are created on ovn_cluster_router to steer the endpoints' traffic to its mgmt port, SNAT rules are created in its iptables and it is labeled as the node in charge of the service's traffic: +Once the `EgressService` is created a node is selected to handle all of its traffic (ingress/egress) as described earlier. +The `EgressService` status is updated with its name, logical router policies are created on ovn_cluster_router to steer the endpoints' traffic to its mgmt port, SNAT rules are created in its iptables and it is labeled as the node in charge of the service's traffic: -The `k8s.ovn.org/egress-service-host` annotation points to `ovn-worker2`, meaning it was selected to handle the service's traffic: +The status points to `ovn-worker2`, meaning it was selected to handle the service's traffic: ``` -$ kubectl describe svc demo-svc -Name: demo-svc -Namespace: default -Annotations: k8s.ovn.org/egress-service: {"nodeSelector":{"matchLabels":{"node-role.kubernetes.io/worker": ""}}} - k8s.ovn.org/egress-service-host: ovn-worker2 -Type: LoadBalancer -LoadBalancer Ingress: 5.5.5.5, 5555:5555:5555:5555:5555:5555:5555:5555 -Endpoints: 10.244.0.5:8080,10.244.2.7:8080 - fd00:10:244:1::5,fd00:10:244:3::7 +$ kubectl describe egressservice demo-svc +Name: demo-svc +Namespace: default +Spec: + Node Selector: + Match Labels: + node-role.kubernetes.io/worker: "" +Status: + Host: ovn-worker2 ``` A logical router policy is created for each endpoint to steer its egress traffic towards `ovn-worker2`'s mgmt port: @@ -192,17 +209,17 @@ $ docker stop ovn-worker2 ovn-worker2 ``` -The `k8s.ovn.org/egress-service-host` annotation now points to `ovn-worker`: +The status now points to `ovn-worker`: ``` -$ kubectl describe svc demo-svc -Name: demo-svc -Namespace: default -Annotations: k8s.ovn.org/egress-service: {"nodeSelector":{"matchLabels":{"node-role.kubernetes.io/worker": ""}}} - k8s.ovn.org/egress-service-host: ovn-worker -Type: LoadBalancer -LoadBalancer Ingress: 5.5.5.5, 5555:5555:5555:5555:5555:5555:5555:5555 -Endpoints: 10.244.0.5:8080,10.244.2.7:8080 - fd00:10:244:1::5,fd00:10:244:3::7 +$ kubectl describe egressservice demo-svc +Name: demo-svc +Namespace: default +Spec: + Node Selector: + Match Labels: + node-role.kubernetes.io/worker: "" +Status: + Host: ovn-worker ``` The reroute destination changed to `ovn-worker`'s mgmt port (10.244.1.2 -> 10.244.0.2, fd00:10:244:2::2 -> fd00:10:244:1::2): @@ -245,20 +262,10 @@ NAME STATUS ROLES ovn-worker Ready worker ``` -Finally, removing the annotation from the service resets the cluster to the point we started from: +Finally, deleting the `EgressService` resource resets the cluster to the point we started from: ``` -$ kubectl annotate svc demo-svc k8s.ovn.org/egress-service- -service/demo-svc annotated -``` - -``` -$ kubectl describe svc demo-svc -Name: demo-svc -Namespace: default -Annotations: -Type: LoadBalancer -LoadBalancer Ingress: 5.5.5.5, 5555:5555:5555:5555:5555:5555:5555:5555 -Endpoints: 10.244.0.5:8080,10.244.2.7:8080 +$ kubectl delete egressservice demo-svc +egressservice.k8s.ovn.org "demo-svc" deleted ``` ``` @@ -300,6 +307,25 @@ $ ip6tables-save | grep EGRESS $ kubectl get nodes -l egress-service.k8s.ovn.org/default-demo-svc="" No resources found ``` + +### Network +In addition, an `EgressService` supports a `network` field. +When it is specified the relevant `ovnkube-node` takes care of creating additional ip rules on its host. +Assuming an `EgressService` has `Network: blue`, a ClusterIP of 10.96.135.5 and its endpoints are 10.244.0.3 and 10.244.1.6, the following ip rules will be created on the host: + +```none +$ ip rule list +5000: from 10.96.135.5 lookup blue +5000: from 10.244.0.3 lookup blue +5000: from 10.244.1.6 lookup blue +``` + +This makes the egress traffic of endpoints of an EgressService to be routed via the "blue" routing table. +An ip rule is also created for the ClusterIP of the service which is needed in order for the return traffic (reply to an external client calling the service) to use the correct table - this is because the packet flow of contacting a LoadBalancer service goes: +`lb ip -> node -> enter ovn with ClusterIP -> exit ovn with ClusterIP -> exit node with lb ip` + +If the routing table does not exist on the host these rules will not be created and an error will be logged. + ## Usage Example While the user does not need to know all of the details of how "Egress Services" work, they need to know that in order for a service to work properly the access to it from outside the cluster (ingress traffic) has to go only through the node labeled with the `egress-service.k8s.ovn.org/-: ""` label - i.e the node designated by OVN-Kubernetes to handle all of the service's traffic. @@ -322,9 +348,7 @@ spec: autoAssign: false ``` -2. Create the LoadBalancer service. We create it with 2 annotations: -- `metallb.universe.tf/address-pool` - to explicitly request the IP to be from the `example-pool`. -- `k8s.ovn.org/egress-service` - to request that all of the endpoints of the service exit the cluster with the service's ingress IP. We also provide a `nodeSelector` so that the traffic exits from a node that matches these selectors. +2. Create the LoadBalancer service and the corresponding EgressService. We create the service with the `metallb.universe.tf/address-pool` annotation to explicitly request its IP to be from the `example-pool` and the EgressService with a `nodeSelector` so that the traffic exits from a node that matches these selectors. ```yaml apiVersion: v1 kind: Service @@ -333,7 +357,6 @@ metadata: namespace: some-namespace annotations: metallb.universe.tf/address-pool: example-pool - k8s.ovn.org/egress-service: '{"nodeSelector":{"matchLabels":{"node-role.kubernetes.io/worker": ""}}}' spec: selector: app: example @@ -343,6 +366,16 @@ spec: port: 8080 targetPort: 8080 type: LoadBalancer +--- +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: example-service + namespace: some-namespace +spec: + nodeSelector: + matchLabels: + node-role.kubernetes.io/worker: "" ``` 3. Advertise the service from the node in charge of the service's traffic. So far the service is "broken" - it is not reachable from outside the cluster and if the pods try to send traffic outside it would probably not come back as it is SNATed to an IP which is unknown. From 0bca32e3bdb07f68467df8ffa1730afe749b546a Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Wed, 19 Apr 2023 13:05:20 +0300 Subject: [PATCH 35/90] Add EgressService "Network" E2E Verifying that the "Network" field works properly: Setting a "dummy" custom routing table on all of the nodes (this heavily relies on the environment to be a kind cluster): We create a new routing table with 2 routes to the external container: 1) The one from the default routing table. 2) A blackhole with a higher priority Then in the actual test we first verify that when the pods are using the custom routing table they can't reach the external container, remove the blackhole route and verify that they can reach it now. This shows that they actually use a different routing table than the main one. Signed-off-by: Ori Braunshtein --- test/e2e/egress_services.go | 116 +++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/test/e2e/egress_services.go b/test/e2e/egress_services.go index 5e1afebd35..e5bb482a7b 100644 --- a/test/e2e/egress_services.go +++ b/test/e2e/egress_services.go @@ -27,6 +27,7 @@ var _ = ginkgo.Describe("Egress Services", func() { externalContainerName = "external-container-for-egress-service" podHTTPPort = 8080 serviceName = "test-egress-service" + customRoutingTable = "100" ) command := []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%d", podHTTPPort)} @@ -55,10 +56,12 @@ var _ = ginkgo.Describe("Egress Services", func() { ginkgo.By("Creating an external container to send the traffic to/from") externalIPv4, externalIPv6 = createClusterExternalContainer(externalContainerName, agnhostImage, []string{"--privileged", "--network", "kind"}, []string{"netexec", fmt.Sprintf("--http-port=%d", podHTTPPort)}) + }) ginkgo.AfterEach(func() { deleteClusterExternalContainer(externalContainerName) + flushCustomRoutingTableOnNodes(nodes, customRoutingTable) }) ginkgotable.DescribeTable("Should validate pods' egress is SNATed to the LB's ingress ip without selectors", @@ -127,6 +130,50 @@ metadata: // This is to be sure we did not break ingress traffic for the service reachAllServiceBackendsFromExternalContainer(externalContainerName, svcIP, podHTTPPort, pods) + ginkgo.By("Creating the custom network") + setCustomRoutingTableOnNodes(nodes, customRoutingTable, externalIPv4, externalIPv6, protocol == v1.IPv4Protocol) + + ginkgo.By("Updating the resource to contain a Network") + egressServiceConfig = fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: EgressService +metadata: + name: ` + serviceName + ` + namespace: ` + f.Namespace.Name + ` +spec: + network: "100" +`) + if err := os.WriteFile(egressServiceYAML, []byte(egressServiceConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + framework.RunKubectlOrDie(f.Namespace.Name, "apply", "-f", egressServiceYAML) + + ginkgo.By("Verifying the pods can't reach the external container due to the blackhole in the custom network") + gomega.Consistently(func() error { + for _, pod := range pods { + err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + if err != nil && !strings.Contains(err.Error(), "exit code 28") { + return fmt.Errorf("expected err to be a connection timed out due to blackhole, got: %w", err) + } + + if err == nil { + return fmt.Errorf("pod %s managed to reach external client despite blackhole", pod) + } + } + return nil + }, 2*time.Second, 400*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "managed to reach external container despite blackhole") + + ginkgo.By("Removing the blackhole to the external container the pods should be able to reach it with the loadbalancer's ingress ip") + delExternalClientBlackholeFromNodes(nodes, customRoutingTable, externalIPv4, externalIPv6, protocol == v1.IPv4Protocol) + gomega.Consistently(func() error { + for _, pod := range pods { + if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort); err != nil { + return err + } + } + return nil + }, 2*time.Second, 400*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") + ginkgo.By("Deleting the EgressService the backend pods should exit with their node's IP") framework.RunKubectlOrDie(f.Namespace.Name, "delete", "-f", egressServiceYAML) @@ -856,7 +903,7 @@ func setSVCRouteOnContainer(container, svcIP, v4Via, v6Via string) { // Returns an error if the expectedIP is different than the response. func curlAgnHostClientIPFromPod(namespace, pod, expectedIP, dstIP string, containerPort int) error { dst := net.JoinHostPort(dstIP, fmt.Sprint(containerPort)) - curlCmd := fmt.Sprintf("curl -s --retry-connrefused --retry 5 --max-time 1 http://%s/clientip", dst) + curlCmd := fmt.Sprintf("curl -s --retry-connrefused --retry 3 --max-time 0.5 http://%s/clientip", dst) out, err := framework.RunHostCmd(namespace, pod, curlCmd) if err != nil { return fmt.Errorf("failed to curl agnhost on %s from %s, err: %w", dstIP, pod, err) @@ -891,3 +938,70 @@ func reachAllServiceBackendsFromExternalContainer(container, svcIP string, svcPo framework.ExpectEqual(len(backends), 0, fmt.Sprintf("did not reach all pods from outside, missed: %v", backends)) } + +// Sets the "dummy" custom routing table on all of the nodes (this heavily relies on the environment to be a kind cluster) +// We create a new routing table with 2 routes to the external container: +// 1) The one from the default routing table. +// 2) A blackhole with a higher priority +// Then in the actual test we first verify that when the pods are using the custom routing table they can't reach the external container, +// remove the blackhole route and verify that they can reach it now. This shows that they actually use a different routing table than the main one. +func setCustomRoutingTableOnNodes(nodes []v1.Node, routingTable, externalV4, externalV6 string, useV4 bool) { + for _, node := range nodes { + if useV4 { + setRoutesOnCustomRoutingTable(node.Name, externalV4, routingTable) + continue + } + if externalV6 != "" { + setRoutesOnCustomRoutingTable(node.Name, externalV6, routingTable) + } + } +} + +// Sets the regular+blackhole routes on the nodes to the external container. +func setRoutesOnCustomRoutingTable(container, ip, table string) { + type route struct { + Dst string `json:"dst"` + Dev string `json:"dev"` + } + out, err := runCommand(containerRuntime, "exec", container, "ip", "--json", "route", "get", ip) + framework.ExpectNoError(err, fmt.Sprintf("failed to get default route to %s on node %s, out: %s", ip, container, out)) + + routes := []route{} + err = json.Unmarshal([]byte(out), &routes) + framework.ExpectNoError(err, fmt.Sprintf("failed to parse route to %s on node %s", ip, container)) + gomega.Expect(routes).ToNot(gomega.HaveLen(0)) + + routeTo := routes[0] + out, err = runCommand(containerRuntime, "exec", container, "ip", "route", "add", ip, "dev", routeTo.Dev, "table", table, "prio", "100") + framework.ExpectNoError(err, fmt.Sprintf("failed to set route to %s on node %s table %s, out: %s", ip, container, table, out)) + + out, err = runCommand(containerRuntime, "exec", container, "ip", "route", "add", "blackhole", ip, "table", table, "prio", "50") + framework.ExpectNoError(err, fmt.Sprintf("failed to set blackhole route to %s on node %s table %s, out: %s", ip, container, table, out)) +} + +// Removes the blackhole route to the external container on the nodes. +func delExternalClientBlackholeFromNodes(nodes []v1.Node, routingTable, externalV4, externalV6 string, useV4 bool) { + for _, node := range nodes { + if useV4 { + out, err := runCommand(containerRuntime, "exec", node.Name, "ip", "route", "del", "blackhole", externalV4, "table", routingTable) + framework.ExpectNoError(err, fmt.Sprintf("failed to delete blackhole route to %s on node %s table %s, out: %s", externalV4, node.Name, routingTable, out)) + continue + } + out, err := runCommand(containerRuntime, "exec", node.Name, "ip", "route", "del", "blackhole", externalV6, "table", routingTable) + framework.ExpectNoError(err, fmt.Sprintf("failed to delete blackhole route to %s on node %s table %s, out: %s", externalV6, node.Name, routingTable, out)) + } +} + +// Flush the custom routing table from all of the nodes. +func flushCustomRoutingTableOnNodes(nodes []v1.Node, routingTable string) { + for _, node := range nodes { + out, err := runCommand(containerRuntime, "exec", node.Name, "ip", "route", "flush", "table", routingTable) + if err != nil && !strings.Contains(err.Error(), "FIB table does not exist") { + framework.Failf("Unable to flush table %s on node %s: out: %s, err: %v", routingTable, node.Name, out, err) + } + out, err = runCommand(containerRuntime, "exec", node.Name, "ip", "-6", "route", "flush", "table", routingTable) + if err != nil && !strings.Contains(err.Error(), "FIB table does not exist") { + framework.Failf("Unable to flush table %s on node %s: out: %s err: %v", routingTable, node.Name, out, err) + } + } +} From 2e250ebe645752735010259a18f4be657649a4b3 Mon Sep 17 00:00:00 2001 From: Ori Braunshtein Date: Wed, 19 Apr 2023 15:46:14 +0300 Subject: [PATCH 36/90] Egress Service: document known non-SNATed traffic issue Signed-off-by: Ori Braunshtein --- docs/egress-service.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/egress-service.md b/docs/egress-service.md index 026f030ed7..e66508b0b7 100644 --- a/docs/egress-service.md +++ b/docs/egress-service.md @@ -325,6 +325,13 @@ An ip rule is also created for the ClusterIP of the service which is needed in o `lb ip -> node -> enter ovn with ClusterIP -> exit ovn with ClusterIP -> exit node with lb ip` If the routing table does not exist on the host these rules will not be created and an error will be logged. +### TBD: Dealing with non SNATed traffic +The host of an Egress Service is often in charge of pods (endpoints) that run in different nodes. +Due to the fact that ovn-controllers on different nodes apply the changes independently, there is +a chance that some pod traffic will reach the host before it configures the relevant SNAT iptables rules. +In that timeframe, the egress traffic from these pods will exit the host with their ip instead of the LB's ingress ip, and the it will not be able to return properly because an external client is not aware of a pod's inner ip. + +This is currently a known issue for EgressService because we can't leverage the same as [EgressIP](./egress-ip.md#Dealing-with-non-SNATed-traffic) currently does by setting a flow on breth0 - the flow won't be hit because the traffic "exits" OVN when using EgressService (= doesn't hit the host's breth0) as opposed to how EgressIP "keeps everything" inside OVN. ## Usage Example From 389769fa28c276f018c59c758a8522177a6aca22 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 5 May 2023 15:05:17 +0200 Subject: [PATCH 37/90] docs, multi-net policy: provide user documentation Signed-off-by: Miguel Duarte Barroso --- docs/multi-homing.md | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/docs/multi-homing.md b/docs/multi-homing.md index c1552f07ab..2bd02f2921 100644 --- a/docs/multi-homing.md +++ b/docs/multi-homing.md @@ -234,6 +234,69 @@ spec: - specifying a static IP address for the pod is only possible when the attachment configuration does **not** feature subnets. +## Multi-network Policies +OVN-Kubernetes implements native support for +[multi-networkpolicy](https://github.com/k8snetworkplumbingwg/multi-networkpolicy), +an API providing +[network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/) +features for secondary networks. + +To configure pod isolation, the user must: +- provision a `network-attachment-definition`. +- provision a `MultiNetworkPolicy` indicating to which secondary networks it + applies via the + [policy-for](https://github.com/k8snetworkplumbingwg/multi-networkpolicy#policy-for-annotation) + annotation. + +**NOTE:** the `OVN_MULTI_NETWORK_ENABLE` config flag must be enabled. + +Please refer to the following example: +```yaml +--- +apiVersion: k8s.cni.cncf.io/v1 +kind: NetworkAttachmentDefinition +metadata: + name: tenant-blue +spec: + config: '{ + "cniVersion": "0.4.0", + "name": "tenant-blue", + "netAttachDefName": "default/tenant-blue", + "topology": "layer2", + "type": "ovn-k8s-cni-overlay", + "subnets": "192.168.100.0/24" + }' +--- +apiVersion: k8s.cni.cncf.io/v1beta1 +kind: MultiNetworkPolicy +metadata: + annotations: + k8s.v1.cni.cncf.io/policy-for: default/tenant-blue # indicates the net-attach-defs this policy applies to + name: allow-ports-same-ns +spec: + podSelector: + matchLabels: + app: stuff-doer # the policy will **apply** to all pods with this label + ingress: + - ports: + - port: 9000 + protocol: TCP + from: + - namespaceSelector: + matchLabels: + role: trusted # only pods on namespaces with this label will be allowed on port 9000 + policyTypes: + - Ingress +``` + +Please note the `MultiNetworkPolicy` has the **exact same** API of the native +`networking.k8s.io/v1` `NetworkPolicy`object; check its documentation for more +information. + +**Note:** It is currently **required** for the `net-attach-def`s referred to by +the `k8s.v1.cni.cncf.io/policy-for` annotation to have the `subnets` attribute +in its `spec.config` defined. + ## Limitations OVN-K currently does **not** support: - the same attachment configured multiple times in the same pod - i.e. From f93bbf1218d9d7cfa6ba95e7bc96c4ad153f45a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 12 Apr 2023 00:22:43 +0000 Subject: [PATCH 38/90] Single interface for network information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To more easily share network information throughout network controller subcomponents. The network information was split between NetInfo (holding the network name) and NetConfInfo (holding all the other information). One easy approach would be to have the network name as well in NetConfInfo but there is no actual reason to have this split as the NetConf origin of that information is not really relevant to the network controllers and subcomponents. The only relevant split is with the AddNAD/RemoveNAD methods as only the NAD and network controllers should be concerned with them but not the subcomponents. Unified the NetInfo implementation for different topologies which did relatively share a good portion of the code, except for the default network which has a pretty static implementation. Had to juggle a bit with the config.ParseClusterSubnets as it requires some minor functional variations for secondary networks but still good enough to re-use. Note that per host prefix length is not used in L2 topologies but will be once interconnect is introduced. Vendored in go-cmp (which was already partially vendored indirectly) for unordered slice comparisons. Signed-off-by: Jaime CaamaƱo Ruiz --- go-controller/go.mod | 2 +- .../pkg/clustermanager/clustermanager.go | 2 +- .../network_cluster_controller.go | 4 +- .../network_cluster_controller_test.go | 6 +- .../secondary_network_cluster_manager.go | 17 +- .../secondary_network_unit_test.go | 19 +- go-controller/pkg/config/utils.go | 57 +- go-controller/pkg/config/utils_test.go | 89 ++- .../network_attach_def_controller.go | 69 +- .../network_controller_manager.go | 19 +- .../node_network_controller_manager.go | 9 +- .../node/default_node_network_controller.go | 5 +- .../default_node_network_controller_test.go | 3 +- .../node/secondary_node_network_controller.go | 4 +- .../pkg/ovn/base_network_controller.go | 3 +- .../pkg/ovn/base_network_controller_pods.go | 3 +- .../pkg/ovn/base_network_controller_policy.go | 4 +- ...ase_secondary_layer2_network_controller.go | 6 +- .../pkg/ovn/default_network_controller.go | 1 - go-controller/pkg/ovn/gress_policy.go | 4 +- go-controller/pkg/ovn/gress_policy_test.go | 2 +- go-controller/pkg/ovn/multipolicy_test.go | 20 +- go-controller/pkg/ovn/ovn_test.go | 13 +- go-controller/pkg/ovn/policy_test.go | 12 +- .../secondary_layer2_network_controller.go | 17 +- .../secondary_layer3_network_controller.go | 21 +- .../secondary_localnet_network_controller.go | 18 +- .../ovn/zone_interconnect/zone_ic_handler.go | 21 +- .../zone_interconnect/zone_ic_handler_test.go | 6 +- go-controller/pkg/util/multi_network.go | 589 +++++++----------- go-controller/pkg/util/multi_network_test.go | 146 +++-- .../google/go-cmp/cmp/cmpopts/equate.go | 156 +++++ .../google/go-cmp/cmp/cmpopts/ignore.go | 206 ++++++ .../google/go-cmp/cmp/cmpopts/sort.go | 147 +++++ .../go-cmp/cmp/cmpopts/struct_filter.go | 189 ++++++ .../google/go-cmp/cmp/cmpopts/xform.go | 36 ++ go-controller/vendor/modules.txt | 1 + 37 files changed, 1300 insertions(+), 626 deletions(-) create mode 100644 go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/equate.go create mode 100644 go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/ignore.go create mode 100644 go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/sort.go create mode 100644 go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/struct_filter.go create mode 100644 go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/xform.go diff --git a/go-controller/go.mod b/go-controller/go.mod index c0b9b215a5..8a3db3d405 100644 --- a/go-controller/go.mod +++ b/go-controller/go.mod @@ -11,6 +11,7 @@ require ( github.com/containernetworking/cni v1.1.2 github.com/containernetworking/plugins v1.2.0 github.com/coreos/go-iptables v0.6.0 + github.com/google/go-cmp v0.5.9 github.com/google/uuid v1.3.0 github.com/gorilla/mux v1.8.0 github.com/j-keck/arping v1.0.2 @@ -70,7 +71,6 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/google/gnostic v0.5.7-v3refs // indirect - github.com/google/go-cmp v0.5.9 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/imdario/mergo v0.3.12 // indirect github.com/josharian/intern v1.0.0 // indirect diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 2d3447ac20..65a53e5e6d 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -43,7 +43,7 @@ type ClusterManager struct { func NewClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, identity string, wg *sync.WaitGroup, recorder record.EventRecorder) (*ClusterManager, error) { defaultNetClusterController := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, - ovnClient, wf, config.HybridOverlay.Enabled, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) + ovnClient, wf, config.HybridOverlay.Enabled, &util.DefaultNetInfo{}) zoneClusterController, err := newZoneClusterController(ovnClient, wf) if err != nil { diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index aafef6f234..6596c5d6c6 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -54,12 +54,11 @@ type networkClusterController struct { hybridOverlaySubnetAllocator *subnetallocator.HostSubnetAllocator util.NetInfo - util.NetConfInfo } func newNetworkClusterController(networkName string, networkID int, clusterSubnets []config.CIDRNetworkEntry, ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, - enableHybridOverlaySubnetAllocator bool, netInfo util.NetInfo, netConfInfo util.NetConfInfo) *networkClusterController { + enableHybridOverlaySubnetAllocator bool, netInfo util.NetInfo) *networkClusterController { kube := &kube.Kube{ KClient: ovnClient.KubeClient, @@ -83,7 +82,6 @@ func newNetworkClusterController(networkName string, networkID int, clusterSubne hybridOverlaySubnetAllocator: hybridOverlaySubnetAllocator, enableHybridOverlaySubnetAllocator: enableHybridOverlaySubnetAllocator, NetInfo: netInfo, - NetConfInfo: netConfInfo, } ncc.initRetryFramework() diff --git a/go-controller/pkg/clustermanager/network_cluster_controller_test.go b/go-controller/pkg/clustermanager/network_cluster_controller_test.go index 83e54f441c..ddf88a98ed 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller_test.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller_test.go @@ -76,7 +76,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, - fakeClient, f, false, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) + fakeClient, f, false, &util.DefaultNetInfo{}) ncc.Start(ctx.Context) defer ncc.Stop() @@ -125,7 +125,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, - fakeClient, f, false, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) + fakeClient, f, false, &util.DefaultNetInfo{}) ncc.Start(ctx.Context) defer ncc.Stop() @@ -177,7 +177,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, - fakeClient, f, false, &util.DefaultNetInfo{}, &util.DefaultNetConfInfo{}) + fakeClient, f, false, &util.DefaultNetInfo{}) ncc.Start(ctx.Context) defer ncc.Stop() diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index f00548a348..cc8c55250e 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -92,18 +92,16 @@ func (sncm *secondaryNetworkClusterManager) Stop() { // NewNetworkController implements the networkAttachDefController.NetworkControllerManager // interface function. This function is called by the net-attach-def controller when // a layer2 or layer3 secondary network is created. Layer2 type is not handled here. -func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetInfo, - netConfInfo util.NetConfInfo) (nad.NetworkController, error) { - topoType := netConfInfo.TopologyType() +func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetInfo) (nad.NetworkController, error) { + topoType := nInfo.TopologyType() if topoType == ovntypes.Layer3Topology { networkId, err := sncm.networkIDAllocator.allocateID(nInfo.GetNetworkName()) if err != nil { return nil, fmt.Errorf("failed to create NetworkController for secondary layer3 network %s : %w", nInfo.GetNetworkName(), err) } - layer3NetConfInfo := netConfInfo.(*util.Layer3NetConfInfo) - sncc := newNetworkClusterController(nInfo.GetNetworkName(), networkId, layer3NetConfInfo.ClusterSubnets, - sncm.ovnClient, sncm.watchFactory, false, nInfo, netConfInfo) + sncc := newNetworkClusterController(nInfo.GetNetworkName(), networkId, nInfo.Subnets(), + sncm.ovnClient, sncm.watchFactory, false, nInfo) return sncc, nil } @@ -164,8 +162,7 @@ func (sncm *secondaryNetworkClusterManager) CleanupDeletedNetworks(allController // newDummyNetworkController creates a dummy network controller used to clean up specific network func (sncm *secondaryNetworkClusterManager) newDummyLayer3NetworkController(netName string) nad.NetworkController { - netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: ovntypes.Layer3Topology}) - layer3NetConfInfo := &util.Layer3NetConfInfo{} - return newNetworkClusterController(netInfo.GetNetworkName(), util.InvalidNetworkID, layer3NetConfInfo.ClusterSubnets, - sncm.ovnClient, sncm.watchFactory, false, netInfo, layer3NetConfInfo) + netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: ovntypes.Layer3Topology}) + return newNetworkClusterController(netInfo.GetNetworkName(), util.InvalidNetworkID, nil, sncm.ovnClient, sncm.watchFactory, + false, netInfo) } diff --git a/go-controller/pkg/clustermanager/secondary_network_unit_test.go b/go-controller/pkg/clustermanager/secondary_network_unit_test.go index 10301901be..3fe855a100 100644 --- a/go-controller/pkg/clustermanager/secondary_network_unit_test.go +++ b/go-controller/pkg/clustermanager/secondary_network_unit_test.go @@ -88,11 +88,9 @@ var _ = ginkgo.Describe("Secondary Layer3 Cluster Controller Manager", func() { sncm, err := newSecondaryNetworkClusterManager(fakeClient, f, record.NewFakeRecorder(0)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer3Topology}) - blueNetSubnets, err := config.ParseClusterSubnetEntries("192.168.0.0/16/24") + netInfo, err := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer3Topology, Subnets: "192.168.0.0/16/24"}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - layer3NetConfInfo := &util.Layer3NetConfInfo{ClusterSubnets: blueNetSubnets} - nc, err := sncm.NewNetworkController(netInfo, layer3NetConfInfo) + nc, err := sncm.NewNetworkController(netInfo) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(nc).NotTo(gomega.BeNil()) nc.Start(ctx.Context) @@ -156,9 +154,9 @@ var _ = ginkgo.Describe("Secondary Layer3 Cluster Controller Manager", func() { sncm, err := newSecondaryNetworkClusterManager(fakeClient, f, record.NewFakeRecorder(0)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer2Topology}) - layer2NetConfInfo := &util.Layer2NetConfInfo{} - nc, err := sncm.NewNetworkController(netInfo, layer2NetConfInfo) + netInfo, err := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer2Topology}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + nc, err := sncm.NewNetworkController(netInfo) gomega.Expect(err).To(gomega.Equal(nad.ErrNetworkControllerTopologyNotManaged)) gomega.Expect(nc).To(gomega.BeNil()) @@ -224,10 +222,9 @@ var _ = ginkgo.Describe("Secondary Layer3 Cluster Controller Manager", func() { // there could be a race in updating the node annotations with the fakeclient. // fakeclient will not return an error in such cases to trigger retry by RetryOnConflict. // So testing the cleanup one at a time. - netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer3Topology}) - layer3NetConfInfo := &util.Layer3NetConfInfo{} - oc := newNetworkClusterController(netInfo.GetNetworkName(), util.InvalidNetworkID, layer3NetConfInfo.ClusterSubnets, - sncm.ovnClient, sncm.watchFactory, false, netInfo, layer3NetConfInfo) + netInfo, err := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer3Topology}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + oc := newNetworkClusterController(netInfo.GetNetworkName(), util.InvalidNetworkID, nil, sncm.ovnClient, sncm.watchFactory, false, netInfo) nadControllers := []nad.NetworkController{oc} err = sncm.CleanupDeletedNetworks(nadControllers) diff --git a/go-controller/pkg/config/utils.go b/go-controller/pkg/config/utils.go index 2c91935cfb..e00c687e8c 100644 --- a/go-controller/pkg/config/utils.go +++ b/go-controller/pkg/config/utils.go @@ -34,16 +34,27 @@ type CIDRNetworkEntry struct { HostSubnetLength int } -// ParseClusterSubnetEntries returns the parsed set of CIDRNetworkEntries passed by the user on the command line -// These entries define the clusters network space by specifying a set of CIDR and netmasks the SDN can allocate -// addresses from. -func ParseClusterSubnetEntries(clusterSubnetCmd string) ([]CIDRNetworkEntry, error) { +func (c CIDRNetworkEntry) String() string { + return fmt.Sprintf("%s/%d", c.CIDR.String(), c.HostSubnetLength) +} + +// ParseClusterSubnetEntriesWithDefaults returns the parsed set of +// CIDRNetworkEntries. These entries define a network space by specifying a set +// of CIDR and netmasks the SDN can allocate addresses from including how that +// network space is partitioned for each of the cluster nodes. When no host +// specific prefix length is specified, the provided ones are assumed as +// default. The host specific prefix length is validated to be greater than the +// overall subnet length. When 0 is specified as default host specific prefix +// length, no host specific prefix length is allowed or validated. +func ParseClusterSubnetEntriesWithDefaults(clusterSubnetCmd string, ipv4HostLength, ipv6HostLength int) ([]CIDRNetworkEntry, error) { var parsedClusterList []CIDRNetworkEntry clusterEntriesList := strings.Split(clusterSubnetCmd, ",") - for _, clusterEntry := range clusterEntriesList { - var parsedClusterEntry CIDRNetworkEntry + ipv4HostLengthAllowed := ipv4HostLength != 0 + ipv6HostLengthAllowed := ipv6HostLength != 0 + for _, clusterEntry := range clusterEntriesList { + clusterEntry := strings.TrimSpace(clusterEntry) splitClusterEntry := strings.Split(clusterEntry, "/") if len(splitClusterEntry) < 2 || len(splitClusterEntry) > 3 { @@ -51,35 +62,43 @@ func ParseClusterSubnetEntries(clusterSubnetCmd string) ([]CIDRNetworkEntry, err } var err error + var parsedClusterEntry CIDRNetworkEntry _, parsedClusterEntry.CIDR, err = net.ParseCIDR(fmt.Sprintf("%s/%s", splitClusterEntry[0], splitClusterEntry[1])) if err != nil { return nil, err } ipv6 := utilnet.IsIPv6(parsedClusterEntry.CIDR.IP) + hostLengthAllowed := (ipv6 && ipv6HostLengthAllowed) || (!ipv6 && ipv4HostLengthAllowed) + entryMaskLength, _ := parsedClusterEntry.CIDR.Mask.Size() if len(splitClusterEntry) == 3 { + if !hostLengthAllowed { + return nil, fmt.Errorf("CIDR %q not properly formatted", clusterEntry) + } tmp, err := strconv.Atoi(splitClusterEntry[2]) if err != nil { return nil, err } parsedClusterEntry.HostSubnetLength = tmp - - if ipv6 && parsedClusterEntry.HostSubnetLength != 64 { - return nil, fmt.Errorf("IPv6 only supports /64 host subnets") - } } else { if ipv6 { - parsedClusterEntry.HostSubnetLength = 64 + parsedClusterEntry.HostSubnetLength = ipv6HostLength } else { // default for backward compatibility - parsedClusterEntry.HostSubnetLength = 24 + parsedClusterEntry.HostSubnetLength = ipv4HostLength } } - if parsedClusterEntry.HostSubnetLength <= entryMaskLength { - return nil, fmt.Errorf("cannot use a host subnet length mask shorter than or equal to the cluster subnet mask. "+ - "host subnet length: %d, cluster subnet length: %d", parsedClusterEntry.HostSubnetLength, entryMaskLength) + if hostLengthAllowed { + if ipv6 && ipv6HostLengthAllowed && parsedClusterEntry.HostSubnetLength != 64 { + return nil, fmt.Errorf("IPv6 only supports /64 host subnets") + } + + if parsedClusterEntry.HostSubnetLength <= entryMaskLength { + return nil, fmt.Errorf("cannot use a host subnet length mask shorter than or equal to the cluster subnet mask. "+ + "host subnet length: %d, cluster subnet length: %d", parsedClusterEntry.HostSubnetLength, entryMaskLength) + } } parsedClusterList = append(parsedClusterList, parsedClusterEntry) @@ -92,6 +111,14 @@ func ParseClusterSubnetEntries(clusterSubnetCmd string) ([]CIDRNetworkEntry, err return parsedClusterList, nil } +// ParseClusterSubnetEntries returns the parsed set of +// CIDRNetworkEntries. If not specified, it assumes a default host specific +// prefix length of 24 or 64 bits for ipv4 and ipv6 respectively. +func ParseClusterSubnetEntries(clusterSubnetCmd string) ([]CIDRNetworkEntry, error) { + // default to 24 bits host specific prefix length for backward compatibility + return ParseClusterSubnetEntriesWithDefaults(clusterSubnetCmd, 24, 64) +} + // ParseFlowCollectors returns the parsed set of HostPorts passed by the user on the command line // These entries define the flow collectors OVS will send flow metadata by using NetFlow/SFlow/IPFIX. func ParseFlowCollectors(flowCollectors string) ([]HostPort, error) { diff --git a/go-controller/pkg/config/utils_test.go b/go-controller/pkg/config/utils_test.go index 54efa0ad51..c0367b9d07 100644 --- a/go-controller/pkg/config/utils_test.go +++ b/go-controller/pkg/config/utils_test.go @@ -9,10 +9,13 @@ import ( func TestParseClusterSubnetEntries(t *testing.T) { tests := []struct { - name string - cmdLineArg string - clusterNetworks []CIDRNetworkEntry - expectedErr bool + name string + cmdLineArg string + clusterNetworks []CIDRNetworkEntry + withDefaultHostSubnetLength bool + defaultIPv4HostSubnetLength int + defaultIPv6HostSubnetLength int + expectedErr bool }{ { name: "Single CIDR correctly formatted", @@ -107,11 +110,85 @@ func TestParseClusterSubnetEntries(t *testing.T) { clusterNetworks: nil, expectedErr: true, }, + { + name: "Two CIDRs correctly formatted with spaces", + cmdLineArg: "10.132.0.0/26/28, 10.133.0.0/26/28", + clusterNetworks: []CIDRNetworkEntry{ + {CIDR: ovntest.MustParseIPNet("10.132.0.0/26"), HostSubnetLength: 28}, + {CIDR: ovntest.MustParseIPNet("10.133.0.0/26"), HostSubnetLength: 28}, + }, + expectedErr: false, + }, + { + name: "Single IPv4 CIDR with default host subnet length", + cmdLineArg: "10.132.0.0/26", + withDefaultHostSubnetLength: true, + defaultIPv4HostSubnetLength: 28, + clusterNetworks: []CIDRNetworkEntry{{CIDR: ovntest.MustParseIPNet("10.132.0.0/26"), HostSubnetLength: 28}}, + expectedErr: false, + }, + { + name: "Single IPv4 CIDR with invalid default host subnet length", + cmdLineArg: "10.132.0.0/26", + withDefaultHostSubnetLength: true, + defaultIPv4HostSubnetLength: 26, + expectedErr: true, + }, + { + name: "Single IPv4 CIDR no host subnet length allowed or validated", + cmdLineArg: "10.132.0.1/32", + withDefaultHostSubnetLength: true, + defaultIPv4HostSubnetLength: 0, + clusterNetworks: []CIDRNetworkEntry{{CIDR: ovntest.MustParseIPNet("10.132.0.1/32")}}, + expectedErr: false, + }, + { + name: "Single IPv4 CIDR no host subnet length allowed", + cmdLineArg: "10.132.0.0/26/28", + withDefaultHostSubnetLength: true, + defaultIPv4HostSubnetLength: 0, + expectedErr: true, + }, + { + name: "Single IPv6 CIDR with default host subnet length", + cmdLineArg: "fda6::/48", + withDefaultHostSubnetLength: true, + defaultIPv6HostSubnetLength: 64, + clusterNetworks: []CIDRNetworkEntry{{CIDR: ovntest.MustParseIPNet("fda6::/48"), HostSubnetLength: 64}}, + expectedErr: false, + }, + { + name: "Single IPv6 CIDR with invalid default host subnet length", + cmdLineArg: "fda6::/64", + withDefaultHostSubnetLength: true, + defaultIPv6HostSubnetLength: 48, + expectedErr: true, + }, + { + name: "Single IPv6 CIDR no host subnet length allowed or validated", + cmdLineArg: "fda6::1/128", + withDefaultHostSubnetLength: true, + defaultIPv6HostSubnetLength: 0, + clusterNetworks: []CIDRNetworkEntry{{CIDR: ovntest.MustParseIPNet("fda6::1/128")}}, + expectedErr: false, + }, + { + name: "Single IPv6 CIDR no host subnet length allowed", + cmdLineArg: "fda6::/48/64", + withDefaultHostSubnetLength: true, + defaultIPv6HostSubnetLength: 0, + expectedErr: true, + }, } for _, tc := range tests { - - parsedList, err := ParseClusterSubnetEntries(tc.cmdLineArg) + var err error + var parsedList []CIDRNetworkEntry + if tc.withDefaultHostSubnetLength { + parsedList, err = ParseClusterSubnetEntriesWithDefaults(tc.cmdLineArg, tc.defaultIPv4HostSubnetLength, tc.defaultIPv6HostSubnetLength) + } else { + parsedList, err = ParseClusterSubnetEntries(tc.cmdLineArg) + } if err != nil && !tc.expectedErr { t.Errorf("Test case \"%s\" expected no errors, got %v", tc.name, err) } diff --git a/go-controller/pkg/network-attach-def-controller/network_attach_def_controller.go b/go-controller/pkg/network-attach-def-controller/network_attach_def_controller.go index 8b68881198..9827f5b4ed 100644 --- a/go-controller/pkg/network-attach-def-controller/network_attach_def_controller.go +++ b/go-controller/pkg/network-attach-def-controller/network_attach_def_controller.go @@ -52,7 +52,7 @@ type BaseNetworkController interface { type NetworkController interface { BaseNetworkController - CompareNetConf(util.NetConfInfo) bool + CompareNetInfo(util.BasicNetInfo) bool AddNAD(nadName string) DeleteNAD(nadName string) HasNAD(nadName string) bool @@ -63,7 +63,7 @@ type NetworkController interface { // NetworkControllerManager manages all network controllers type NetworkControllerManager interface { - NewNetworkController(netInfo util.NetInfo, netConfInfo util.NetConfInfo) (NetworkController, error) + NewNetworkController(netInfo util.NetInfo) (NetworkController, error) CleanupDeletedNetworks(allControllers []NetworkController) error } @@ -73,11 +73,6 @@ type networkNADInfo struct { isStarted bool } -type nadNetConfInfo struct { - util.NetConfInfo - netName string -} - type NetAttachDefinitionController struct { name string recorder record.EventRecorder @@ -90,8 +85,8 @@ type NetAttachDefinitionController struct { stopChan chan struct{} wg sync.WaitGroup - // key is nadName, value is nadNetConfInfo - perNADNetConfInfo *syncmap.SyncMap[*nadNetConfInfo] + // key is nadName, value is BasicNetInfo + perNADNetInfo *syncmap.SyncMap[util.BasicNetInfo] // controller for all networks, key is netName of net-attach-def, value is networkNADInfo // this map is updated either at the very beginning of network controller manager when initializing the // default controller or when net-attach-def is added/deleted. All these are serialized by syncmap lock @@ -119,7 +114,7 @@ func NewNetAttachDefinitionController(name string, ncm NetworkControllerManager, queue: workqueue.NewNamedRateLimitingQueue(rateLimiter, "net-attach-def"), loopPeriod: time.Second, stopChan: make(chan struct{}), - perNADNetConfInfo: syncmap.NewSyncMap[*nadNetConfInfo](), + perNADNetInfo: syncmap.NewSyncMap[util.BasicNetInfo](), perNetworkNADInfo: syncmap.NewSyncMap[*networkNADInfo](), } _, err := netAttachDefInformer.Informer().AddEventHandler( @@ -354,7 +349,6 @@ func (nadController *NetAttachDefinitionController) getAllNetworkControllers() [ // Non-retriable errors (configuration error etc.) are just logged, and the function immediately returns nil. func (nadController *NetAttachDefinitionController) AddNetAttachDef(ncm NetworkControllerManager, netattachdef *nettypes.NetworkAttachmentDefinition, doStart bool) error { - var netConfInfo util.NetConfInfo var nInfo util.NetInfo var err, invalidNADErr error var netName string @@ -362,7 +356,7 @@ func (nadController *NetAttachDefinitionController) AddNetAttachDef(ncm NetworkC netAttachDefName := util.GetNADName(netattachdef.Namespace, netattachdef.Name) klog.Infof("%s: Add net-attach-def %s", nadController.name, netAttachDefName) - nInfo, netConfInfo, invalidNADErr = util.ParseNADInfo(netattachdef) + nInfo, invalidNADErr = util.ParseNADInfo(netattachdef) if invalidNADErr == nil { netName = nInfo.GetNetworkName() if netName == types.DefaultNetworkName { @@ -370,24 +364,21 @@ func (nadController *NetAttachDefinitionController) AddNetAttachDef(ncm NetworkC } } - return nadController.perNADNetConfInfo.DoWithLock(netAttachDefName, func(nadName string) error { - nadNci, loaded := nadController.perNADNetConfInfo.LoadOrStore(nadName, &nadNetConfInfo{ - NetConfInfo: netConfInfo, - netName: netName, - }) + return nadController.perNADNetInfo.DoWithLock(netAttachDefName, func(nadName string) error { + nadNci, loaded := nadController.perNADNetInfo.LoadOrStore(nadName, nInfo) if !loaded { // first time to process this nad if invalidNADErr != nil { // invalid nad, nothing to do klog.Warningf("%s: net-attach-def %s is first seen and is invalid: %v", nadController.name, nadName, invalidNADErr) - nadController.perNADNetConfInfo.Delete(nadName) + nadController.perNADNetInfo.Delete(nadName) return nil } klog.V(5).Infof("%s: net-attach-def %s network %s first seen", nadController.name, nadName, netName) - err = nadController.addNADToController(ncm, nadName, nInfo, netConfInfo, doStart) + err = nadController.addNADToController(ncm, nadName, nInfo, doStart) if err != nil { klog.Errorf("%s: Failed to add net-attach-def %s to network %s: %v", nadController.name, nadName, netName, err) - nadController.perNADNetConfInfo.Delete(nadName) + nadController.perNADNetInfo.Delete(nadName) return err } } else { @@ -395,11 +386,7 @@ func (nadController *NetAttachDefinitionController) AddNetAttachDef(ncm NetworkC nadUpdated := false if invalidNADErr != nil { nadUpdated = true - } else if nadNci.netName != netName { - // netconf network name changed - klog.V(5).Infof("%s: net-attach-def %s network name %s has changed", nadController.name, netName, nadNci.netName) - nadUpdated = true - } else if !nadNci.CompareNetConf(netConfInfo) { + } else if !nadNci.CompareNetInfo(nInfo) { // netconf spec changed klog.V(5).Infof("%s: net-attach-def %s spec has changed", nadController.name, nadName) nadUpdated = true @@ -410,7 +397,7 @@ func (nadController *NetAttachDefinitionController) AddNetAttachDef(ncm NetworkC if !doStart { return nil } - err = nadController.addNADToController(ncm, nadName, nInfo, netConfInfo, doStart) + err = nadController.addNADToController(ncm, nadName, nInfo, doStart) if err != nil { klog.Errorf("%s: Failed to add net-attach-def %s to network %s: %v", nadController.name, nadName, netName, err) return err @@ -420,23 +407,24 @@ func (nadController *NetAttachDefinitionController) AddNetAttachDef(ncm NetworkC if nadUpdated { klog.V(5).Infof("%s: net-attach-def %s network %s updated", nadController.name, nadName, netName) // delete the NAD from the old network first - err := nadController.deleteNADFromController(nadNci.netName, nadName) + oldNetName := nadNci.GetNetworkName() + err := nadController.deleteNADFromController(oldNetName, nadName) if err != nil { - klog.Errorf("%s: Failed to delete net-attach-def %s from network %s: %v", nadController.name, nadName, nadNci.netName, err) + klog.Errorf("%s: Failed to delete net-attach-def %s from network %s: %v", nadController.name, nadName, oldNetName, err) return err } - nadController.perNADNetConfInfo.Delete(nadName) + nadController.perNADNetInfo.Delete(nadName) } if invalidNADErr != nil { klog.Warningf("%s: net-attach-def %s is invalid: %v", nadController.name, nadName, invalidNADErr) return nil } klog.V(5).Infof("%s: Add updated net-attach-def %s to network %s", nadController.name, nadName, netName) - nadController.perNADNetConfInfo.LoadOrStore(nadName, &nadNetConfInfo{NetConfInfo: netConfInfo, netName: netName}) - err = nadController.addNADToController(ncm, nadName, nInfo, netConfInfo, doStart) + nadController.perNADNetInfo.LoadOrStore(nadName, nInfo) + err = nadController.addNADToController(ncm, nadName, nInfo, doStart) if err != nil { klog.Errorf("%s: Failed to add net-attach-def %s to network %s: %v", nadController.name, nadName, netName, err) - nadController.perNADNetConfInfo.Delete(nadName) + nadController.perNADNetInfo.Delete(nadName) return err } return nil @@ -449,24 +437,25 @@ func (nadController *NetAttachDefinitionController) AddNetAttachDef(ncm NetworkC // is the last NAD of the network func (nadController *NetAttachDefinitionController) DeleteNetAttachDef(netAttachDefName string) error { klog.Infof("%s: Delete net-attach-def %s", nadController.name, netAttachDefName) - return nadController.perNADNetConfInfo.DoWithLock(netAttachDefName, func(nadName string) error { - existingNadNetConfInfo, found := nadController.perNADNetConfInfo.Load(nadName) + return nadController.perNADNetInfo.DoWithLock(netAttachDefName, func(nadName string) error { + existingNadNetConfInfo, found := nadController.perNADNetInfo.Load(nadName) if !found { klog.V(5).Infof("%s: net-attach-def %s not found for removal", nadController.name, nadName) return nil } - err := nadController.deleteNADFromController(existingNadNetConfInfo.netName, nadName) + netName := existingNadNetConfInfo.GetNetworkName() + err := nadController.deleteNADFromController(netName, nadName) if err != nil { - klog.Errorf("%s: Failed to delete net-attach-def %s from network %s: %v", nadController.name, nadName, existingNadNetConfInfo.netName, err) + klog.Errorf("%s: Failed to delete net-attach-def %s from network %s: %v", nadController.name, nadName, netName, err) return err } - nadController.perNADNetConfInfo.Delete(nadName) + nadController.perNADNetInfo.Delete(nadName) return nil }) } func (nadController *NetAttachDefinitionController) addNADToController(ncm NetworkControllerManager, nadName string, - nInfo util.NetInfo, netConfInfo util.NetConfInfo, doStart bool) (err error) { + nInfo util.NetInfo, doStart bool) (err error) { var oc NetworkController var nadExists, isStarted bool @@ -486,7 +475,7 @@ func (nadController *NetAttachDefinitionController) addNADToController(ncm Netwo }() // first NAD for this network, create controller klog.V(5).Infof("%s: First net-attach-def %s of network %s added, create network controller", nadController.name, nadName, networkName) - oc, err = ncm.NewNetworkController(nInfo, netConfInfo) + oc, err = ncm.NewNetworkController(nInfo) if err != nil { return err } @@ -498,7 +487,7 @@ func (nadController *NetAttachDefinitionController) addNADToController(ncm Netwo isStarted = nni.isStarted _, nadExists = nni.nadNames[nadName] - if !oc.CompareNetConf(netConfInfo) { + if !oc.CompareNetInfo(nInfo) { if nadExists { // this should not happen, continue to start the existing controller if requested return fmt.Errorf("%s: net-attach-def %s netconf spec changed, should not happen", nadController.name, networkName) diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index b87f348bf4..d04ab7b926 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -60,26 +60,25 @@ type networkControllerManager struct { nadController *nad.NetAttachDefinitionController } -func (cm *networkControllerManager) NewNetworkController(nInfo util.NetInfo, - netConfInfo util.NetConfInfo) (nad.NetworkController, error) { +func (cm *networkControllerManager) NewNetworkController(nInfo util.NetInfo) (nad.NetworkController, error) { cnci, err := cm.newCommonNetworkControllerInfo() if err != nil { return nil, fmt.Errorf("failed to create network controller info %w", err) } - topoType := netConfInfo.TopologyType() + topoType := nInfo.TopologyType() switch topoType { case ovntypes.Layer3Topology: - return ovn.NewSecondaryLayer3NetworkController(cnci, nInfo, netConfInfo, nil), nil + return ovn.NewSecondaryLayer3NetworkController(cnci, nInfo), nil case ovntypes.Layer2Topology: if config.OVNKubernetesFeature.EnableInterconnect { return nil, fmt.Errorf("topology type %s not supported when Interconnect feature is enabled", topoType) } - return ovn.NewSecondaryLayer2NetworkController(cnci, nInfo, netConfInfo, nil), nil + return ovn.NewSecondaryLayer2NetworkController(cnci, nInfo), nil case ovntypes.LocalnetTopology: if config.OVNKubernetesFeature.EnableInterconnect { return nil, fmt.Errorf("topology type %s not supported when Interconnect feature is enabled", topoType) } - return ovn.NewSecondaryLocalnetNetworkController(cnci, nInfo, netConfInfo, nil), nil + return ovn.NewSecondaryLocalnetNetworkController(cnci, nInfo), nil } return nil, fmt.Errorf("topology type %s not supported", topoType) } @@ -90,14 +89,14 @@ func (cm *networkControllerManager) newDummyNetworkController(topoType, netName if err != nil { return nil, fmt.Errorf("failed to create network controller info %w", err) } - netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: topoType}) + netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: topoType}) switch topoType { case ovntypes.Layer3Topology: - return ovn.NewSecondaryLayer3NetworkController(cnci, netInfo, &util.Layer3NetConfInfo{}, nil), nil + return ovn.NewSecondaryLayer3NetworkController(cnci, netInfo), nil case ovntypes.Layer2Topology: - return ovn.NewSecondaryLayer2NetworkController(cnci, netInfo, &util.Layer2NetConfInfo{}, nil), nil + return ovn.NewSecondaryLayer2NetworkController(cnci, netInfo), nil case ovntypes.LocalnetTopology: - return ovn.NewSecondaryLocalnetNetworkController(cnci, netInfo, &util.LocalnetNetConfInfo{}, nil), nil + return ovn.NewSecondaryLocalnetNetworkController(cnci, netInfo), nil } return nil, fmt.Errorf("topology type %s not supported", topoType) } diff --git a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go index 34997ed2ff..2a0b4db0ed 100644 --- a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go @@ -39,13 +39,12 @@ type nodeNetworkControllerManager struct { nadController *nad.NetAttachDefinitionController } -// NewNetworkController create secondary node network controllers for the given NetInfo and NetConfInfo -func (ncm *nodeNetworkControllerManager) NewNetworkController(nInfo util.NetInfo, - netConfInfo util.NetConfInfo) (nad.NetworkController, error) { - topoType := netConfInfo.TopologyType() +// NewNetworkController create secondary node network controllers for the given NetInfo +func (ncm *nodeNetworkControllerManager) NewNetworkController(nInfo util.NetInfo) (nad.NetworkController, error) { + topoType := nInfo.TopologyType() switch topoType { case ovntypes.Layer3Topology, ovntypes.Layer2Topology, ovntypes.LocalnetTopology: - return node.NewSecondaryNodeNetworkController(ncm.newCommonNetworkControllerInfo(), nInfo, netConfInfo), nil + return node.NewSecondaryNodeNetworkController(ncm.newCommonNetworkControllerInfo(), nInfo), nil } return nil, fmt.Errorf("topology type %s not supported", topoType) } diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index dd61bad28a..1e87d34dcc 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -54,9 +54,9 @@ type CommonNodeNetworkControllerInfo struct { // BaseNodeNetworkController structure per-network fields and network specific configuration type BaseNodeNetworkController struct { CommonNodeNetworkControllerInfo - // per controller nad/netconf name information + + // network information util.NetInfo - util.NetConfInfo // podNADToDPUCDMap tracks the NAD/DPU_ConnectionDetails mapping for all NADs that each pod requests. // Key is pod.UUID; value is nadToDPUCDMap (of map[string]*util.DPUConnectionDetails). Key of nadToDPUCDMap @@ -115,7 +115,6 @@ func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, sto return &DefaultNodeNetworkController{ BaseNodeNetworkController: BaseNodeNetworkController{ CommonNodeNetworkControllerInfo: *cnnci, - NetConfInfo: &util.DefaultNetConfInfo{}, NetInfo: &util.DefaultNetInfo{}, stopChan: stopChan, wg: wg, diff --git a/go-controller/pkg/node/default_node_network_controller_test.go b/go-controller/pkg/node/default_node_network_controller_test.go index a644acf2f7..a139d05bb9 100644 --- a/go-controller/pkg/node/default_node_network_controller_test.go +++ b/go-controller/pkg/node/default_node_network_controller_test.go @@ -63,8 +63,7 @@ var _ = Describe("Node", func() { name: nodeName, Kube: kubeMock, }, - NetConfInfo: &util.DefaultNetConfInfo{}, - NetInfo: &util.DefaultNetInfo{}, + NetInfo: &util.DefaultNetInfo{}, }, } diff --git a/go-controller/pkg/node/secondary_node_network_controller.go b/go-controller/pkg/node/secondary_node_network_controller.go index 0f0d977cf0..c53ec67f01 100644 --- a/go-controller/pkg/node/secondary_node_network_controller.go +++ b/go-controller/pkg/node/secondary_node_network_controller.go @@ -20,12 +20,10 @@ type SecondaryNodeNetworkController struct { // NewSecondaryNodeNetworkController creates a new OVN controller for creating logical network // infrastructure and policy for default l3 network -func NewSecondaryNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, netInfo util.NetInfo, - netconfInfo util.NetConfInfo) *SecondaryNodeNetworkController { +func NewSecondaryNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, netInfo util.NetInfo) *SecondaryNodeNetworkController { return &SecondaryNodeNetworkController{ BaseNodeNetworkController: BaseNodeNetworkController{ CommonNodeNetworkControllerInfo: *cnnci, - NetConfInfo: netconfInfo, NetInfo: netInfo, stopChan: make(chan struct{}), wg: &sync.WaitGroup{}, diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index ee0fedad7d..afe40a776d 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -76,9 +76,8 @@ type BaseNetworkController struct { // controllerName should be used to identify objects owned by given controller in the db controllerName string - // per controller NAD/netconf name information + // network information util.NetInfo - util.NetConfInfo // retry framework for pods retryPods *ovnretry.RetryFramework diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 9061688874..87e28806b5 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -395,8 +395,7 @@ func (bnc *BaseNetworkController) addRoutesGatewayIP(pod *kapi.Pod, network *nad return err } gatewayIPnet := util.GetNodeGatewayIfAddr(nodeSubnet) - layer3NetConfInfo := bnc.NetConfInfo.(*util.Layer3NetConfInfo) - for _, clusterSubnet := range layer3NetConfInfo.ClusterSubnets { + for _, clusterSubnet := range bnc.Subnets() { if isIPv6 == utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { podAnnotation.Routes = append(podAnnotation.Routes, util.PodRoute{ Dest: clusterSubnet.CIDR, diff --git a/go-controller/pkg/ovn/base_network_controller_policy.go b/go-controller/pkg/ovn/base_network_controller_policy.go index faad1641e8..57ef06c578 100644 --- a/go-controller/pkg/ovn/base_network_controller_policy.go +++ b/go-controller/pkg/ovn/base_network_controller_policy.go @@ -922,7 +922,7 @@ func (bnc *BaseNetworkController) createNetworkPolicy(policy *knet.NetworkPolicy for i, ingressJSON := range policy.Spec.Ingress { klog.V(5).Infof("Network policy ingress is %+v", ingressJSON) - ingress := newGressPolicy(knet.PolicyTypeIngress, i, policy.Namespace, policy.Name, bnc.controllerName, statelessNetPol, bnc.NetConfInfo) + ingress := newGressPolicy(knet.PolicyTypeIngress, i, policy.Namespace, policy.Name, bnc.controllerName, statelessNetPol, bnc.NetInfo) // append ingress policy to be able to cleanup created address set // see cleanupNetworkPolicy for details np.ingressPolicies = append(np.ingressPolicies, ingress) @@ -948,7 +948,7 @@ func (bnc *BaseNetworkController) createNetworkPolicy(policy *knet.NetworkPolicy for i, egressJSON := range policy.Spec.Egress { klog.V(5).Infof("Network policy egress is %+v", egressJSON) - egress := newGressPolicy(knet.PolicyTypeEgress, i, policy.Namespace, policy.Name, bnc.controllerName, statelessNetPol, bnc.NetConfInfo) + egress := newGressPolicy(knet.PolicyTypeEgress, i, policy.Namespace, policy.Name, bnc.controllerName, statelessNetPol, bnc.NetInfo) // append ingress policy to be able to cleanup created address set // see cleanupNetworkPolicy for details np.egressPolicies = append(np.egressPolicies, egress) diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index cb7ae2874a..74af5e3a1b 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -9,6 +9,7 @@ import ( iputils "github.com/containernetworking/plugins/pkg/ip" mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" @@ -271,7 +272,7 @@ func (oc *BaseSecondaryLayer2NetworkController) Run() error { return nil } -func (oc *BaseSecondaryLayer2NetworkController) InitializeLogicalSwitch(switchName string, clusterSubnets []*net.IPNet, +func (oc *BaseSecondaryLayer2NetworkController) InitializeLogicalSwitch(switchName string, clusterSubnets []config.CIDRNetworkEntry, excludeSubnets []*net.IPNet) (*nbdb.LogicalSwitch, error) { logicalSwitch := nbdb.LogicalSwitch{ Name: switchName, @@ -282,7 +283,8 @@ func (oc *BaseSecondaryLayer2NetworkController) InitializeLogicalSwitch(switchNa logicalSwitch.ExternalIDs[types.TopologyVersionExternalID] = strconv.Itoa(oc.topologyVersion) hostSubnets := make([]*net.IPNet, 0, len(clusterSubnets)) - for _, subnet := range clusterSubnets { + for _, clusterSubnet := range clusterSubnets { + subnet := clusterSubnet.CIDR hostSubnets = append(hostSubnets, subnet) if utilnet.IsIPv6CIDR(subnet) { logicalSwitch.OtherConfig = map[string]string{"ipv6_prefix": subnet.IP.String()} diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 1ed857d6b8..52bca289b9 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -174,7 +174,6 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, BaseNetworkController: BaseNetworkController{ CommonNetworkControllerInfo: *cnci, controllerName: DefaultNetworkControllerName, - NetConfInfo: &util.DefaultNetConfInfo{}, NetInfo: &util.DefaultNetInfo{}, lsManager: lsm.NewLogicalSwitchManager(), logicalPortCache: newPortCache(defaultStopChan), diff --git a/go-controller/pkg/ovn/gress_policy.go b/go-controller/pkg/ovn/gress_policy.go index 0330c25d60..cfb8883f33 100644 --- a/go-controller/pkg/ovn/gress_policy.go +++ b/go-controller/pkg/ovn/gress_policy.go @@ -83,8 +83,8 @@ func (pp *portPolicy) getL4Match() (string, error) { return foundProtocol, nil } -func newGressPolicy(policyType knet.PolicyType, idx int, namespace, name, controllerName string, isNetPolStateless bool, netConfInfo util.NetConfInfo) *gressPolicy { - ipv4Mode, ipv6Mode := netConfInfo.IPMode() +func newGressPolicy(policyType knet.PolicyType, idx int, namespace, name, controllerName string, isNetPolStateless bool, netInfo util.BasicNetInfo) *gressPolicy { + ipv4Mode, ipv6Mode := netInfo.IPMode() return &gressPolicy{ controllerName: controllerName, policyNamespace: namespace, diff --git a/go-controller/pkg/ovn/gress_policy_test.go b/go-controller/pkg/ovn/gress_policy_test.go index 6e099b0a41..82a0cfdd9e 100644 --- a/go-controller/pkg/ovn/gress_policy_test.go +++ b/go-controller/pkg/ovn/gress_policy_test.go @@ -112,7 +112,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { for _, tc := range testcases { gressPolicy := newGressPolicy(knet.PolicyTypeIngress, 5, "testing", "test", - DefaultNetworkControllerName, false, &util.DefaultNetConfInfo{}) + DefaultNetworkControllerName, false, &util.DefaultNetInfo{}) for _, ipBlock := range tc.ipBlocks { gressPolicy.addIPBlock(ipBlock) } diff --git a/go-controller/pkg/ovn/multipolicy_test.go b/go-controller/pkg/ovn/multipolicy_test.go index 8134868baf..2f339f5634 100644 --- a/go-controller/pkg/ovn/multipolicy_test.go +++ b/go-controller/pkg/ovn/multipolicy_test.go @@ -19,7 +19,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" - libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -147,11 +146,11 @@ func (p testPod) populateSecondaryNetworkLogicalSwitchCache(fakeOvn *FakeOVN, oc case ovntypes.Layer2Topology: uuid := getLogicalSwitchUUID(fakeOvn.controller.nbClient, ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch)) subnet := ocInfo.bnc.Subnets()[0] - err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch), uuid, []*net.IPNet{ovntest.MustParseIPNet(subnet)}) + err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch), uuid, []*net.IPNet{subnet.CIDR}) case ovntypes.LocalnetTopology: uuid := getLogicalSwitchUUID(fakeOvn.controller.nbClient, ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch)) subnet := ocInfo.bnc.Subnets()[0] - err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLocalnetSwitch), uuid, []*net.IPNet{ovntest.MustParseIPNet(subnet)}) + err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLocalnetSwitch), uuid, []*net.IPNet{subnet.CIDR}) } gomega.Expect(err).NotTo(gomega.HaveOccurred()) } @@ -219,12 +218,12 @@ func getExpectedDataPodsAndSwitchesForSecondaryNetwork(fakeOvn *FakeOVN, pods [] } func getMultiPolicyData(networkPolicy *knet.NetworkPolicy, localPortUUIDs []string, peerNamespaces []string, - tcpPeerPorts []int32, netInfo util.NetInfo) []libovsdbtest.TestData { + tcpPeerPorts []int32, netInfo util.NetInfo) []libovsdb.TestData { return getPolicyDataHelper(networkPolicy, localPortUUIDs, peerNamespaces, tcpPeerPorts, "", false, false, netInfo) } -func getMultiDefaultDenyData(networkPolicy *knet.NetworkPolicy, ports []string, netInfo util.NetInfo) []libovsdbtest.TestData { +func getMultiDefaultDenyData(networkPolicy *knet.NetworkPolicy, ports []string, netInfo util.NetInfo) []libovsdb.TestData { policyTypeIngress, policyTypeEgress := getPolicyType(networkPolicy) return getDefaultDenyDataHelper(networkPolicy.Namespace, policyTypeIngress, policyTypeEgress, ports, "", "", netInfo) @@ -298,7 +297,8 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) netconf, err := util.ParseNetConf(nad) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - netInfo = util.NewNetInfo(netconf) + netInfo, err = util.NewNetInfo(netconf) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) ginkgo.AfterEach(func() { @@ -368,9 +368,11 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { } if namespaces != nil { err = ocInfo.bnc.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) } if pods != nil { err = ocInfo.bnc.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) } err = ocInfo.bnc.WatchMultiNetworkPolicy() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -406,7 +408,7 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { Get(context.TODO(), mpolicy.Name, metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ocInfo, _ := fakeOvn.secondaryControllers[secondaryNetworkName] + ocInfo := fakeOvn.secondaryControllers[secondaryNetworkName] ocInfo.asf.EventuallyExpectEmptyAddressSetExist(namespaceName1) ocInfo.asf.EventuallyExpectEmptyAddressSetExist(namespaceName2) @@ -429,7 +431,7 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { app.Action = func(ctx *cli.Context) error { namespace1 := *newNamespace(namespaceName1) nPodTest := getTestPod(namespace1.Name, nodeName) - nPodTest.addNetwork(secondaryNetworkName, util.GetNADName(nad.Namespace, nad.Name), "", "", "", "10.1.1.2", "0a:58:0a:01:01:02") + nPodTest.addNetwork(secondaryNetworkName, util.GetNADName(nad.Namespace, nad.Name), "", "", "", "10.1.1.1", "0a:58:0a:01:01:01") networkPolicy := getPortNetworkPolicy(netPolicyName1, namespace1.Name, labelName, labelVal, portNum) startOvn(initialDB, []v1.Namespace{namespace1}, nil, nil, []nettypes.NetworkAttachmentDefinition{*nad}, []testPod{nPodTest}, map[string]string{labelName: labelVal}) @@ -464,7 +466,7 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { Get(context.TODO(), mpolicy.Name, metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ocInfo, _ := fakeOvn.secondaryControllers[secondaryNetworkName] + ocInfo := fakeOvn.secondaryControllers[secondaryNetworkName] portInfo := nPodTest.getNetworkPortInfo(secondaryNetworkName, util.GetNADName(nad.Namespace, nad.Name)) gomega.Expect(portInfo).NotTo(gomega.Equal(nil)) ocInfo.asf.ExpectAddressSetWithIPs(namespaceName1, []string{portInfo.podIP}) diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index 06b2a4c7ee..0fabcad241 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -306,12 +306,12 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt var ok bool nadName := util.GetNADName(netattachdef.Namespace, netattachdef.Name) - nInfo, netConfInfo, err := util.ParseNADInfo(netattachdef) + nInfo, err := util.ParseNADInfo(netattachdef) if err != nil { return err } netName := nInfo.GetNetworkName() - topoType := netConfInfo.TopologyType() + topoType := nInfo.TopologyType() ocInfo, ok = o.secondaryControllers[netName] if !ok { nbZoneFailed := false @@ -351,13 +351,16 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt switch topoType { case types.Layer3Topology: - l3Controller := NewSecondaryLayer3NetworkController(cnci, nInfo, netConfInfo, asf) + l3Controller := NewSecondaryLayer3NetworkController(cnci, nInfo) + l3Controller.addressSetFactory = asf secondaryController = &l3Controller.BaseSecondaryNetworkController case types.Layer2Topology: - l2Controller := NewSecondaryLayer2NetworkController(cnci, nInfo, netConfInfo, asf) + l2Controller := NewSecondaryLayer2NetworkController(cnci, nInfo) + l2Controller.addressSetFactory = asf secondaryController = &l2Controller.BaseSecondaryNetworkController case types.LocalnetTopology: - localnetController := NewSecondaryLocalnetNetworkController(cnci, nInfo, netConfInfo, asf) + localnetController := NewSecondaryLocalnetNetworkController(cnci, nInfo) + localnetController.addressSetFactory = asf secondaryController = &localnetController.BaseSecondaryNetworkController default: return fmt.Errorf("topoloty type %s not supported", topoType) diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index cf3ad60656..88ab8c8f65 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -45,7 +45,6 @@ func getFakeController(controllerName string) *DefaultNetworkController { BaseNetworkController: BaseNetworkController{ controllerName: controllerName, NetInfo: &util.DefaultNetInfo{}, - NetConfInfo: &util.DefaultNetConfInfo{}, }, } return controller @@ -80,11 +79,10 @@ func newNetworkPolicy(name, namespace string, podSelector metav1.LabelSelector, return policy } -func getFakeBaseController(netInfo util.NetInfo, netConfInfo util.NetConfInfo) *BaseNetworkController { +func getFakeBaseController(netInfo util.NetInfo) *BaseNetworkController { return &BaseNetworkController{ controllerName: netInfo.GetNetworkName() + "-network-controller", NetInfo: netInfo, - NetConfInfo: netConfInfo, } } @@ -92,7 +90,7 @@ func getFakeBaseController(netInfo util.NetInfo, netConfInfo util.NetConfInfo) * // and egress func getDefaultDenyDataHelper(namespace string, policyTypeIngress, policyTypeEgress bool, ports []string, denyLogSeverity nbdb.ACLSeverity, staleNetpolName string, netInfo util.NetInfo) []libovsdb.TestData { - fakeController := getFakeBaseController(netInfo, nil) + fakeController := getFakeBaseController(netInfo) egressPGName := fakeController.defaultDenyPortGroupName(namespace, egressDefaultDenySuffix) shouldBeLogged := denyLogSeverity != "" aclIDs := fakeController.getDefaultDenyPolicyACLIDs(namespace, aclEgress, defaultDenyACL) @@ -265,7 +263,7 @@ func getMultinetNsAddrSetHashNames(ns, controllerName string) (string, string) { func getGressACLs(gressIdx int, namespace, policyName string, peerNamespaces []string, tcpPeerPorts []int32, peers []knet.NetworkPolicyPeer, logSeverity nbdb.ACLSeverity, policyType knet.PolicyType, stale, statelessNetPol bool, netInfo util.NetInfo) []*nbdb.ACL { - fakeController := getFakeBaseController(netInfo, nil) + fakeController := getFakeBaseController(netInfo) pgName, _ := fakeController.getNetworkPolicyPGName(namespace, policyName) controllerName := netInfo.GetNetworkName() + "-network-controller" shouldBeLogged := logSeverity != "" @@ -411,7 +409,7 @@ func getPolicyDataHelper(networkPolicy *knet.NetworkPolicy, localPortUUIDs []str lsps = append(lsps, &nbdb.LogicalSwitchPort{UUID: uuid}) } - fakeController := getFakeBaseController(netInfo, nil) + fakeController := getFakeBaseController(netInfo) pgName, readableName := fakeController.getNetworkPolicyPGName(networkPolicy.Namespace, networkPolicy.Name) pg := fakeController.buildPortGroup( pgName, @@ -2139,7 +2137,7 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Low-Level Operations", func() { config.IPv6Mode = false asIDs := getPodSelectorAddrSetDbIDs("test_name", DefaultNetworkControllerName) gp := newGressPolicy(knet.PolicyTypeIngress, 0, "testing", "policy", controllerName, - false, &util.DefaultNetConfInfo{}) + false, &util.DefaultNetInfo{}) gp.hasPeerSelector = true gp.addPeerAddressSets(addressset.GetHashNamesForAS(asIDs)) diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller.go b/go-controller/pkg/ovn/secondary_layer2_network_controller.go index ce738a84d3..4e5a8c4f85 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller.go @@ -2,12 +2,13 @@ package ovn import ( "context" + "sync" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" - "sync" "k8s.io/klog/v2" ) @@ -19,21 +20,18 @@ type SecondaryLayer2NetworkController struct { } // NewSecondaryLayer2NetworkController create a new OVN controller for the given secondary layer2 nad -func NewSecondaryLayer2NetworkController(cnci *CommonNetworkControllerInfo, netInfo util.NetInfo, - netconfInfo util.NetConfInfo, addressSetFactory addressset.AddressSetFactory) *SecondaryLayer2NetworkController { +func NewSecondaryLayer2NetworkController(cnci *CommonNetworkControllerInfo, netInfo util.NetInfo) *SecondaryLayer2NetworkController { + stopChan := make(chan struct{}) - ipv4Mode, ipv6Mode := netconfInfo.IPMode() - if addressSetFactory == nil { - addressSetFactory = addressset.NewOvnAddressSetFactory(cnci.nbClient, ipv4Mode, ipv6Mode) - } + ipv4Mode, ipv6Mode := netInfo.IPMode() + addressSetFactory := addressset.NewOvnAddressSetFactory(cnci.nbClient, ipv4Mode, ipv6Mode) oc := &SecondaryLayer2NetworkController{ BaseSecondaryLayer2NetworkController{ BaseSecondaryNetworkController: BaseSecondaryNetworkController{ BaseNetworkController: BaseNetworkController{ CommonNetworkControllerInfo: *cnci, controllerName: netInfo.GetNetworkName() + "-network-controller", - NetConfInfo: netconfInfo, NetInfo: netInfo, lsManager: lsm.NewL2SwitchManager(), logicalPortCache: newPortCache(stopChan), @@ -76,8 +74,7 @@ func (oc *SecondaryLayer2NetworkController) Cleanup(netName string) error { func (oc *SecondaryLayer2NetworkController) Init() error { switchName := oc.GetNetworkScopedName(types.OVNLayer2Switch) - layer2NetConfInfo := oc.NetConfInfo.(*util.Layer2NetConfInfo) - _, err := oc.InitializeLogicalSwitch(switchName, layer2NetConfInfo.ClusterSubnets, layer2NetConfInfo.ExcludeSubnets) + _, err := oc.InitializeLogicalSwitch(switchName, oc.Subnets(), oc.ExcludeSubnets()) return err } diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index 0ce503d097..2f381aef6a 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -5,7 +5,6 @@ import ( "fmt" "net" "reflect" - "strings" "sync" "time" @@ -253,24 +252,22 @@ type SecondaryLayer3NetworkController struct { } // NewSecondaryLayer3NetworkController create a new OVN controller for the given secondary layer3 NAD -func NewSecondaryLayer3NetworkController(cnci *CommonNetworkControllerInfo, netInfo util.NetInfo, - netconfInfo util.NetConfInfo, addressSetFactory addressset.AddressSetFactory) *SecondaryLayer3NetworkController { +func NewSecondaryLayer3NetworkController(cnci *CommonNetworkControllerInfo, netInfo util.NetInfo) *SecondaryLayer3NetworkController { + stopChan := make(chan struct{}) - ipv4Mode, ipv6Mode := netconfInfo.IPMode() + ipv4Mode, ipv6Mode := netInfo.IPMode() var zoneICHandler *zoneic.ZoneInterconnectHandler if config.OVNKubernetesFeature.EnableInterconnect { zoneICHandler = zoneic.NewZoneInterconnectHandler(netInfo, cnci.nbClient, cnci.sbClient) } - // controllerName must be unique to identify db object owned by given controller - if addressSetFactory == nil { - addressSetFactory = addressset.NewOvnAddressSetFactory(cnci.nbClient, ipv4Mode, ipv6Mode) - } + + addressSetFactory := addressset.NewOvnAddressSetFactory(cnci.nbClient, ipv4Mode, ipv6Mode) + oc := &SecondaryLayer3NetworkController{ BaseSecondaryNetworkController: BaseSecondaryNetworkController{ BaseNetworkController: BaseNetworkController{ CommonNetworkControllerInfo: *cnci, controllerName: netInfo.GetNetworkName() + "-network-controller", - NetConfInfo: netconfInfo, NetInfo: netInfo, lsManager: lsm.NewLogicalSwitchManager(), logicalPortCache: newPortCache(stopChan), @@ -617,11 +614,7 @@ func (oc *SecondaryLayer3NetworkController) syncNodes(nodes []interface{}) error return fmt.Errorf("failed to get node logical switches which have other-config set for network %s: %v", oc.GetNetworkName(), err) } for _, nodeSwitch := range nodeSwitches { - if !strings.HasPrefix(nodeSwitch.Name, oc.GetPrefix()) { - klog.Errorf("Node switch name %s unexpected, expect prefixed with %s", nodeSwitch.Name, oc.GetPrefix()) - continue - } - nodeName := strings.Trim(nodeSwitch.Name, oc.GetPrefix()) + nodeName := oc.RemoveNetworkScopeFromName(nodeSwitch.Name) if !foundNodes.Has(nodeName) { if err := oc.deleteNode(nodeName); err != nil { return fmt.Errorf("failed to delete node:%s, err:%v", nodeName, err) diff --git a/go-controller/pkg/ovn/secondary_localnet_network_controller.go b/go-controller/pkg/ovn/secondary_localnet_network_controller.go index f157c36591..ba51023448 100644 --- a/go-controller/pkg/ovn/secondary_localnet_network_controller.go +++ b/go-controller/pkg/ovn/secondary_localnet_network_controller.go @@ -21,21 +21,18 @@ type SecondaryLocalnetNetworkController struct { } // NewSecondaryLocalnetNetworkController create a new OVN controller for the given secondary localnet NAD -func NewSecondaryLocalnetNetworkController(cnci *CommonNetworkControllerInfo, netInfo util.NetInfo, - netconfInfo util.NetConfInfo, addressSetFactory addressset.AddressSetFactory) *SecondaryLocalnetNetworkController { +func NewSecondaryLocalnetNetworkController(cnci *CommonNetworkControllerInfo, netInfo util.NetInfo) *SecondaryLocalnetNetworkController { + stopChan := make(chan struct{}) - ipv4Mode, ipv6Mode := netconfInfo.IPMode() - if addressSetFactory == nil { - addressSetFactory = addressset.NewOvnAddressSetFactory(cnci.nbClient, ipv4Mode, ipv6Mode) - } + ipv4Mode, ipv6Mode := netInfo.IPMode() + addressSetFactory := addressset.NewOvnAddressSetFactory(cnci.nbClient, ipv4Mode, ipv6Mode) oc := &SecondaryLocalnetNetworkController{ BaseSecondaryLayer2NetworkController{ BaseSecondaryNetworkController: BaseSecondaryNetworkController{ BaseNetworkController: BaseNetworkController{ CommonNetworkControllerInfo: *cnci, controllerName: netInfo.GetNetworkName() + "-network-controller", - NetConfInfo: netconfInfo, NetInfo: netInfo, lsManager: lsm.NewL2SwitchManager(), logicalPortCache: newPortCache(stopChan), @@ -78,9 +75,8 @@ func (oc *SecondaryLocalnetNetworkController) Cleanup(netName string) error { func (oc *SecondaryLocalnetNetworkController) Init() error { switchName := oc.GetNetworkScopedName(types.OVNLocalnetSwitch) - localnetNetConfInfo := oc.NetConfInfo.(*util.LocalnetNetConfInfo) - logicalSwitch, err := oc.InitializeLogicalSwitch(switchName, localnetNetConfInfo.ClusterSubnets, localnetNetConfInfo.ExcludeSubnets) + logicalSwitch, err := oc.InitializeLogicalSwitch(switchName, oc.Subnets(), oc.ExcludeSubnets()) if err != nil { return err } @@ -96,8 +92,8 @@ func (oc *SecondaryLocalnetNetworkController) Init() error { "network_name": oc.GetNetworkScopedName(types.LocalNetBridgeName), }, } - if localnetNetConfInfo.VLANID != 0 { - intVlanID := localnetNetConfInfo.VLANID + intVlanID := int(oc.Vlan()) + if intVlanID != 0 { logicalSwitchPort.TagRequest = &intVlanID } diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go index 45c3b13d02..0ee426913a 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go @@ -124,8 +124,8 @@ func NewZoneInterconnectHandler(nInfo util.NetInfo, nbClient, sbClient libovsdbc sbClient: sbClient, } - zic.networkClusterRouterName = zic.getNetworkScopedName(types.OVNClusterRouter) - zic.networkTransitSwitchName = zic.getNetworkScopedName(types.TransitSwitch) + zic.networkClusterRouterName = zic.GetNetworkScopedName(types.OVNClusterRouter) + zic.networkTransitSwitchName = zic.GetNetworkScopedName(types.TransitSwitch) return zic } @@ -284,7 +284,7 @@ func (zic *ZoneInterconnectHandler) createLocalZoneNodeResources(node *corev1.No } // Connect transit switch to the cluster router by creating a pair of logical switch port - logical router port - logicalRouterPortName := zic.getNetworkScopedName(types.RouterToTransitSwitchPrefix + node.Name) + logicalRouterPortName := zic.GetNetworkScopedName(types.RouterToTransitSwitchPrefix + node.Name) logicalRouterPort := nbdb.LogicalRouterPort{ Name: logicalRouterPortName, MAC: transitRouterPortMac.String(), @@ -310,7 +310,7 @@ func (zic *ZoneInterconnectHandler) createLocalZoneNodeResources(node *corev1.No externalIDs := map[string]string{ "node": node.Name, } - err = zic.addNodeLogicalSwitchPort(zic.networkTransitSwitchName, zic.getNetworkScopedName(types.TransitSwitchToRouterPrefix+node.Name), + err = zic.addNodeLogicalSwitchPort(zic.networkTransitSwitchName, zic.GetNetworkScopedName(types.TransitSwitchToRouterPrefix+node.Name), lportTypeRouter, []string{lportTypeRouterAddr}, lspOptions, externalIDs) if err != nil { return err @@ -374,7 +374,7 @@ func (zic *ZoneInterconnectHandler) createRemoteZoneNodeResources(node *corev1.N "node": node.Name, } - remotePortName := zic.getNetworkScopedName(types.TransitSwitchToRouterPrefix + node.Name) + remotePortName := zic.GetNetworkScopedName(types.TransitSwitchToRouterPrefix + node.Name) if err := zic.addNodeLogicalSwitchPort(zic.networkTransitSwitchName, remotePortName, lportTypeRemote, []string{remotePortAddr}, lspOptions, externalIDs); err != nil { return err } @@ -439,7 +439,7 @@ func (zic *ZoneInterconnectHandler) cleanupNode(nodeName string) error { func (zic *ZoneInterconnectHandler) cleanupNodeClusterRouterPort(nodeName string) error { lrp := nbdb.LogicalRouterPort{ - Name: zic.getNetworkScopedName(types.RouterToTransitSwitchPrefix + nodeName), + Name: zic.GetNetworkScopedName(types.RouterToTransitSwitchPrefix + nodeName), } logicalRouterPort, err := libovsdbops.GetLogicalRouterPort(zic.nbClient, &lrp) if err != nil { @@ -463,7 +463,7 @@ func (zic *ZoneInterconnectHandler) cleanupNodeTransitSwitchPort(nodeName string Name: zic.networkTransitSwitchName, } logicalSwitchPort := &nbdb.LogicalSwitchPort{ - Name: zic.getNetworkScopedName(types.TransitSwitchToRouterPrefix + nodeName), + Name: zic.GetNetworkScopedName(types.TransitSwitchToRouterPrefix + nodeName), } if err := libovsdbops.DeleteLogicalSwitchPorts(zic.nbClient, logicalSwitch, logicalSwitchPort); err != nil { @@ -600,13 +600,6 @@ func (zic *ZoneInterconnectHandler) deleteLocalNodeStaticRoutes(node *corev1.Nod return nil } -// getNetworkScopedName returns the network scoped name. -// Note: For default primary network, zic.GetPrefix() will return "" -// and for secondary networks it will return "_" -func (zic *ZoneInterconnectHandler) getNetworkScopedName(name string) string { - return fmt.Sprintf("%s%s", zic.GetPrefix(), name) -} - // interconnectStaticRoute represents a static route type interconnectStaticRoute struct { prefix string diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go index 9d77499f56..15d7d7c88d 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go @@ -582,7 +582,8 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: cnitypes.NetConf{Name: "blue"}, Topology: types.Layer3Topology}) + netInfo, err := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: cnitypes.NetConf{Name: "blue"}, Topology: types.Layer3Topology}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) zoneICHandler := NewZoneInterconnectHandler(netInfo, libovsdbOvnNBClient, libovsdbOvnSBClient) err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -619,7 +620,8 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode2) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - netInfo := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: cnitypes.NetConf{Name: "blue"}, Topology: types.Layer3Topology}) + netInfo, err := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: cnitypes.NetConf{Name: "blue"}, Topology: types.Layer3Topology}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) zoneICHandler := NewZoneInterconnectHandler(netInfo, libovsdbOvnNBClient, libovsdbOvnSBClient) err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) gomega.Expect(err).NotTo(gomega.HaveOccurred()) diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index 4dce48d633..83f9b37036 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -4,30 +4,45 @@ import ( "errors" "fmt" "net" - "sort" "strings" "sync" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + kapi "k8s.io/api/core/v1" + knet "k8s.io/utils/net" + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" - - kapi "k8s.io/api/core/v1" - kerrors "k8s.io/apimachinery/pkg/util/errors" - "k8s.io/klog/v2" - utilnet "k8s.io/utils/net" ) var ErrorAttachDefNotOvnManaged = errors.New("net-attach-def not managed by OVN") -// NetInfo is interface which holds network name information -// for default network, this is set to nil -type NetInfo interface { +// BasicNetInfo is interface which holds basic network information +type BasicNetInfo interface { + // basic network information GetNetworkName() string IsSecondary() bool - GetPrefix() string - GetNetworkScopedName(string) string + TopologyType() string + MTU() int + IPMode() (bool, bool) + Subnets() []config.CIDRNetworkEntry + ExcludeSubnets() []*net.IPNet + Vlan() uint + + // utility methods + CompareNetInfo(BasicNetInfo) bool + GetNetworkScopedName(name string) string + RemoveNetworkScopeFromName(name string) string +} + +// NetInfo correlates which NADs refer to a network in addition to the basic +// network information +type NetInfo interface { + BasicNetInfo AddNAD(nadName string) DeleteNAD(nadName string) HasNAD(nadName string) bool @@ -45,13 +60,15 @@ func (nInfo *DefaultNetInfo) IsSecondary() bool { return false } -// GetPrefix returns if the logical entities prefix for this network -func (nInfo *DefaultNetInfo) GetPrefix() string { - return "" +// GetNetworkScopedName returns a network scoped name form the provided one +// appropriate to use globally. +func (nInfo *DefaultNetInfo) GetNetworkScopedName(name string) string { + // for the default network, names are not scoped + return name } -// GetNetworkScopedName returns network scope name in this network for the give name -func (nInfo *DefaultNetInfo) GetNetworkScopedName(name string) string { +func (nInfo *DefaultNetInfo) RemoveNetworkScopeFromName(name string) string { + // for the default network, names are not scoped return name } @@ -71,429 +88,262 @@ func (nInfo *DefaultNetInfo) HasNAD(nadName string) bool { panic("unexpected call for default network") } +func (nInfo *DefaultNetInfo) CompareNetInfo(netBasicInfo BasicNetInfo) bool { + _, ok := netBasicInfo.(*DefaultNetInfo) + return ok +} + +// TopologyType returns the defaultNetConfInfo's topology type which is empty +func (nInfo *DefaultNetInfo) TopologyType() string { + return "" +} + +// MTU returns the defaultNetConfInfo's MTU value +func (nInfo *DefaultNetInfo) MTU() int { + return config.Default.MTU +} + +// IPMode returns the defaultNetConfInfo's ipv4/ipv6 mode +func (nInfo *DefaultNetInfo) IPMode() (bool, bool) { + return config.IPv4Mode, config.IPv6Mode +} + +// Subnets returns the defaultNetConfInfo's Subnets value +func (nInfo *DefaultNetInfo) Subnets() []config.CIDRNetworkEntry { + return config.Default.ClusterSubnets +} + +// ExcludeSubnets returns the defaultNetConfInfo's ExcludeSubnets value +func (nInfo *DefaultNetInfo) ExcludeSubnets() []*net.IPNet { + return nil +} + +// Vlan returns the defaultNetConfInfo's Vlan value +func (nInfo *DefaultNetInfo) Vlan() uint { + return config.Gateway.VLANID +} + // SecondaryNetInfo holds the network name information for secondary network if non-nil -type SecondaryNetInfo struct { - // network name - netName string +type secondaryNetInfo struct { + netName string + topology string + mtu int + vlan uint + + ipv4mode, ipv6mode bool + subnets []config.CIDRNetworkEntry + excludeSubnets []*net.IPNet + // all net-attach-def NAD names for this network, used to determine if a pod needs // to be plumbed for this network - nadNames *sync.Map + nadNames sync.Map } // GetNetworkName returns the network name -func (nInfo *SecondaryNetInfo) GetNetworkName() string { +func (nInfo *secondaryNetInfo) GetNetworkName() string { return nInfo.netName } // IsSecondary returns if this network is secondary -func (nInfo *SecondaryNetInfo) IsSecondary() bool { +func (nInfo *secondaryNetInfo) IsSecondary() bool { return true } -// GetPrefix returns if the logical entities prefix for this network -func (nInfo *SecondaryNetInfo) GetPrefix() string { - return GetSecondaryNetworkPrefix(nInfo.netName) +// GetNetworkScopedName returns a network scoped name from the provided one +// appropriate to use globally. +func (nInfo *secondaryNetInfo) GetNetworkScopedName(name string) string { + return fmt.Sprintf("%s%s", nInfo.getPrefix(), name) +} + +// RemoveNetworkScopeFromName removes the name without the network scope added +// by a previous call to GetNetworkScopedName +func (nInfo *secondaryNetInfo) RemoveNetworkScopeFromName(name string) string { + // for the default network, names are not scoped + return strings.Trim(name, nInfo.getPrefix()) } -// GetNetworkScopedName returns network scope name in this network for the give name -func (nInfo *SecondaryNetInfo) GetNetworkScopedName(name string) string { - return fmt.Sprintf("%s%s", nInfo.GetPrefix(), name) +// getPrefix returns if the logical entities prefix for this network +func (nInfo *secondaryNetInfo) getPrefix() string { + return GetSecondaryNetworkPrefix(nInfo.netName) } // AddNAD adds the specified NAD -func (nInfo *SecondaryNetInfo) AddNAD(nadName string) { +func (nInfo *secondaryNetInfo) AddNAD(nadName string) { nInfo.nadNames.Store(nadName, true) } // DeleteNAD deletes the specified NAD -func (nInfo *SecondaryNetInfo) DeleteNAD(nadName string) { +func (nInfo *secondaryNetInfo) DeleteNAD(nadName string) { nInfo.nadNames.Delete(nadName) } // HasNAD returns true if the given NAD exists, used // to check if the network needs to be plumbed over -func (nInfo *SecondaryNetInfo) HasNAD(nadName string) bool { +func (nInfo *secondaryNetInfo) HasNAD(nadName string) bool { _, ok := nInfo.nadNames.Load(nadName) return ok } -// NetConfInfo is structure which holds specific per-network configuration -type NetConfInfo interface { - CompareNetConf(NetConfInfo) bool - TopologyType() string - MTU() int - Subnets() []string - IPMode() (bool, bool) +// TopologyType returns the topology type +func (nInfo *secondaryNetInfo) TopologyType() string { + return nInfo.topology } -// DefaultNetConfInfo is structure which holds specific default network information -type DefaultNetConfInfo struct{} - -// CompareNetConf compares the defaultNetConfInfo with the given newNetConfInfo and returns true -// unless the given newNetConfInfo is not the type of DefaultNetConfInfo -func (defaultNetConfInfo *DefaultNetConfInfo) CompareNetConf(newNetConfInfo NetConfInfo) bool { - _, ok := newNetConfInfo.(*DefaultNetConfInfo) - if !ok { - klog.V(5).Infof("New netconf is different, expect default network netconf") - return false - } - return true +// MTU returns the layer3NetConfInfo's MTU value +func (nInfo *secondaryNetInfo) MTU() int { + return nInfo.mtu } -// TopologyType returns the defaultNetConfInfo's topology type which is empty -func (defaultNetConfInfo *DefaultNetConfInfo) TopologyType() string { - return "" +// Vlan returns the Vlan value +func (nInfo *secondaryNetInfo) Vlan() uint { + return nInfo.vlan } -// MTU returns the defaultNetConfInfo's MTU value -func (defaultNetConfInfo *DefaultNetConfInfo) MTU() int { - return config.Default.MTU +// IPMode returns the ipv4/ipv6 mode +func (nInfo *secondaryNetInfo) IPMode() (bool, bool) { + return nInfo.ipv4mode, nInfo.ipv6mode } -// Subnets returns the defaultNetConfInfo's Subnets value -func (defaultNetConfInfo *DefaultNetConfInfo) Subnets() []string { - return []string{config.Default.RawClusterSubnets} +// Subnets returns the Subnets value +func (nInfo *secondaryNetInfo) Subnets() []config.CIDRNetworkEntry { + return nInfo.subnets } -// IPMode returns the defaultNetConfInfo's ipv4/ipv6 mode -func (defaultNetConfInfo *DefaultNetConfInfo) IPMode() (bool, bool) { - return config.IPv4Mode, config.IPv6Mode +// ExcludeSubnets returns the ExcludeSubnets value +func (nInfo *secondaryNetInfo) ExcludeSubnets() []*net.IPNet { + return nInfo.excludeSubnets } -func isSubnetsStringEqual(subnetsString, newSubnetsString string) bool { - subnetsStringList := strings.Split(subnetsString, ",") - newSubnetsStringList := strings.Split(newSubnetsString, ",") - if len(subnetsStringList) != len(newSubnetsStringList) { +// CompareNetInfo compares for equality this network information with the other +func (nInfo *secondaryNetInfo) CompareNetInfo(other BasicNetInfo) bool { + if nInfo.netName != other.GetNetworkName() { return false } - for index := range subnetsStringList { - subnetsStringList[index] = strings.TrimSpace(subnetsStringList[index]) - } - for index := range newSubnetsStringList { - newSubnetsStringList[index] = strings.TrimSpace(newSubnetsStringList[index]) - } - sort.Strings(subnetsStringList) - sort.Strings(newSubnetsStringList) - for i, subnetString := range subnetsStringList { - if subnetString != newSubnetsStringList[i] { - return false - } - } - return true -} - -// parseSubnetsString parses comma-seperated subnet string and returns the list of subnets -func parseSubnetsString(clusterSubnetString string) ([]*net.IPNet, error) { - var subnetList []*net.IPNet - - if strings.TrimSpace(clusterSubnetString) == "" { - return subnetList, nil - } - - subnetStringList := strings.Split(clusterSubnetString, ",") - for _, subnetString := range subnetStringList { - subnetString = strings.TrimSpace(subnetString) - _, subnet, err := net.ParseCIDR(subnetString) - if err != nil { - return nil, err - } - - subnetList = append(subnetList, subnet) - } - return subnetList, nil -} - -// Layer3NetConfInfo is structure which holds specific secondary layer3 network information -type Layer3NetConfInfo struct { - subnets string - mtu int - ClusterSubnets []config.CIDRNetworkEntry -} - -// CompareNetConf compares the layer3NetConfInfo with the given newNetConfInfo and returns true -// if they share the same netconf information -func (layer3NetConfInfo *Layer3NetConfInfo) CompareNetConf(newNetConfInfo NetConfInfo) bool { - var errs []error - var err error - - newLayer3NetConfInfo, ok := newNetConfInfo.(*Layer3NetConfInfo) - if !ok { - klog.V(5).Infof("New netconf topology type is different, expect %s", - layer3NetConfInfo.TopologyType()) + if nInfo.topology != other.TopologyType() { return false } - - if !isSubnetsStringEqual(layer3NetConfInfo.subnets, newLayer3NetConfInfo.subnets) { - err = fmt.Errorf("new %s netconf subnets %v has changed, expect %v", - types.Layer3Topology, newLayer3NetConfInfo.subnets, layer3NetConfInfo.subnets) - errs = append(errs, err) - } - - if layer3NetConfInfo.mtu != newLayer3NetConfInfo.mtu { - err = fmt.Errorf("new %s netconf mtu %v has changed, expect %v", - types.Layer3Topology, newLayer3NetConfInfo.mtu, layer3NetConfInfo.mtu) - errs = append(errs, err) + if nInfo.mtu != other.MTU() { + return false } - if len(errs) != 0 { - err = kerrors.NewAggregate(errs) - klog.V(5).Infof(err.Error()) + if nInfo.vlan != other.Vlan() { return false } - return true -} -func newLayer3NetConfInfo(netconf *ovncnitypes.NetConf) (*Layer3NetConfInfo, error) { - clusterSubnets, err := config.ParseClusterSubnetEntries(netconf.Subnets) - if err != nil { - return nil, fmt.Errorf("cluster subnet %s is invalid: %v", netconf.Subnets, err) + lessCIDRNetworkEntry := func(a, b config.CIDRNetworkEntry) bool { return a.String() < b.String() } + if !cmp.Equal(nInfo.subnets, other.Subnets(), cmpopts.SortSlices(lessCIDRNetworkEntry)) { + return false } - return &Layer3NetConfInfo{ - subnets: netconf.Subnets, - mtu: netconf.MTU, - ClusterSubnets: clusterSubnets, - }, nil -} - -// TopologyType returns the layer3NetConfInfo's topology type which is layer3 topology -func (layer3NetConfInfo *Layer3NetConfInfo) TopologyType() string { - return types.Layer3Topology + lessIPNet := func(a, b net.IPNet) bool { return a.String() < b.String() } + return cmp.Equal(nInfo.excludeSubnets, other.ExcludeSubnets(), cmpopts.SortSlices(lessIPNet)) } -// MTU returns the layer3NetConfInfo's MTU value -func (layer3NetConfInfo *Layer3NetConfInfo) MTU() int { - return layer3NetConfInfo.mtu -} - -// Subnets returns the layer3NetConfInfo's Subnets value -func (layer3NetConfInfo *Layer3NetConfInfo) Subnets() []string { - return strings.Split(layer3NetConfInfo.subnets, ",") -} - -// IPMode returns the layer3NetConfInfo's ipv4/ipv6 mode -func (layer3NetConfInfo *Layer3NetConfInfo) IPMode() (bool, bool) { - var ipv6Mode, ipv4Mode bool - for _, cidr := range layer3NetConfInfo.ClusterSubnets { - if utilnet.IsIPv6CIDR(cidr.CIDR) { - ipv6Mode = true - } else { - ipv4Mode = true - } +func newLayer3NetConfInfo(netconf *ovncnitypes.NetConf) (NetInfo, error) { + subnets, _, err := parseSubnets(netconf.Subnets, "", types.Layer3Topology) + if err != nil { + return nil, err } - return ipv4Mode, ipv6Mode -} -// Layer2NetConfInfo is structure which holds specific secondary layer2 network information -type Layer2NetConfInfo struct { - subnets string - mtu int - excludeSubnets string - - ClusterSubnets []*net.IPNet - ExcludeSubnets []*net.IPNet -} - -// CompareNetConf compares the layer2NetConfInfo with the given newNetConfInfo and returns true -// if they share the same configuration -func (layer2NetConfInfo *Layer2NetConfInfo) CompareNetConf(newNetConfInfo NetConfInfo) bool { - var errs []error - var err error - newLayer2NetConfInfo, ok := newNetConfInfo.(*Layer2NetConfInfo) - if !ok { - klog.V(5).Infof("New netconf topology type is different, expect %s", - layer2NetConfInfo.TopologyType()) - return false - } - if !isSubnetsStringEqual(layer2NetConfInfo.subnets, newLayer2NetConfInfo.subnets) { - err = fmt.Errorf("new %s netconf subnets %v has changed, expect %v", - types.Layer2Topology, newLayer2NetConfInfo.subnets, layer2NetConfInfo.subnets) - errs = append(errs, err) + ni := &secondaryNetInfo{ + netName: netconf.Name, + topology: types.Layer3Topology, + subnets: subnets, + mtu: netconf.MTU, } - if layer2NetConfInfo.mtu != newLayer2NetConfInfo.mtu { - err = fmt.Errorf("new %s netconf mtu %v has changed, expect %v", - types.Layer2Topology, newLayer2NetConfInfo.mtu, layer2NetConfInfo.mtu) - errs = append(errs, err) - } - if !isSubnetsStringEqual(layer2NetConfInfo.excludeSubnets, newLayer2NetConfInfo.excludeSubnets) { - err = fmt.Errorf("new %s netconf excludeSubnets %v has changed, expect %v", - types.Layer2Topology, newLayer2NetConfInfo.excludeSubnets, layer2NetConfInfo.excludeSubnets) - errs = append(errs, err) - } - if len(errs) != 0 { - err = kerrors.NewAggregate(errs) - klog.V(5).Infof(err.Error()) - return false - } - return true + ni.ipv4mode, ni.ipv6mode = getIPMode(subnets) + return ni, nil } -func newLayer2NetConfInfo(netconf *ovncnitypes.NetConf) (*Layer2NetConfInfo, error) { - clusterSubnets, excludeSubnets, err := verifyExcludeIPs(netconf.Subnets, netconf.ExcludeSubnets) +func newLayer2NetConfInfo(netconf *ovncnitypes.NetConf) (NetInfo, error) { + subnets, excludes, err := parseSubnets(netconf.Subnets, netconf.ExcludeSubnets, types.Layer2Topology) if err != nil { return nil, fmt.Errorf("invalid %s netconf %s: %v", netconf.Topology, netconf.Name, err) } - return &Layer2NetConfInfo{ - subnets: netconf.Subnets, + ni := &secondaryNetInfo{ + netName: netconf.Name, + topology: types.Layer2Topology, + subnets: subnets, + excludeSubnets: excludes, mtu: netconf.MTU, - excludeSubnets: netconf.ExcludeSubnets, - ClusterSubnets: clusterSubnets, - ExcludeSubnets: excludeSubnets, - }, nil -} - -func verifyExcludeIPs(subnetsString string, excludeSubnetsString string) ([]*net.IPNet, []*net.IPNet, error) { - clusterSubnets, err := parseSubnetsString(subnetsString) - if err != nil { - return nil, nil, fmt.Errorf("subnets %s is invalid: %v", subnetsString, err) } + ni.ipv4mode, ni.ipv6mode = getIPMode(subnets) + return ni, nil +} - excludeSubnets, err := parseSubnetsString(excludeSubnetsString) +func newLocalnetNetConfInfo(netconf *ovncnitypes.NetConf) (NetInfo, error) { + subnets, excludes, err := parseSubnets(netconf.Subnets, netconf.ExcludeSubnets, types.Layer2Topology) if err != nil { - return nil, nil, fmt.Errorf("excludeSubnets %s is invalid: %v", excludeSubnetsString, err) - } - - for _, excludeSubnet := range excludeSubnets { - found := false - for _, subnet := range clusterSubnets { - if ContainsCIDR(subnet, excludeSubnet) { - found = true - break - } - } - if !found { - return nil, nil, fmt.Errorf("the provided network subnets %v does not contain exluded subnets %v", - clusterSubnets, excludeSubnet) - } + return nil, fmt.Errorf("invalid %s netconf %s: %v", netconf.Topology, netconf.Name, err) } - return clusterSubnets, excludeSubnets, nil -} - -// TopologyType returns layer2NetConfInfo's topology type -func (layer2NetConfInfo *Layer2NetConfInfo) TopologyType() string { - return types.Layer2Topology -} - -// MTU returns layer2NetConfInfo's MTU value -func (layer2NetConfInfo *Layer2NetConfInfo) MTU() int { - return layer2NetConfInfo.mtu -} - -// Subnets returns layer2NetConfInfo's subnets information -func (layer2NetConfInfo *Layer2NetConfInfo) Subnets() []string { - subnets := strings.Split(layer2NetConfInfo.subnets, ",") - if len(subnets) == 1 && strings.TrimSpace(subnets[0]) == "" { - return nil + ni := &secondaryNetInfo{ + netName: netconf.Name, + topology: types.Layer2Topology, + subnets: subnets, + excludeSubnets: excludes, + mtu: netconf.MTU, + vlan: uint(netconf.VLANID), } - return subnets + ni.ipv4mode, ni.ipv6mode = getIPMode(subnets) + return ni, nil } -// IPMode returns the layer2NetConfInfo's ipv4/ipv6 mode -func (layer2NetConfInfo *Layer2NetConfInfo) IPMode() (bool, bool) { - var ipv6Mode, ipv4Mode bool - for _, subnet := range layer2NetConfInfo.ClusterSubnets { - if utilnet.IsIPv6CIDR(subnet) { - ipv6Mode = true - } else { - ipv4Mode = true +func parseSubnets(subnetsString, excludeSubnetsString, topology string) ([]config.CIDRNetworkEntry, []*net.IPNet, error) { + var parseSubnets func(clusterSubnetCmd string) ([]config.CIDRNetworkEntry, error) + switch topology { + case types.Layer3Topology: + // For L3 topology, subnet is validated + parseSubnets = config.ParseClusterSubnetEntries + case types.LocalnetTopology, types.Layer2Topology: + // For L2 topologies, host specific prefix length is ignored (using 0 as + // prefix length) + parseSubnets = func(clusterSubnetCmd string) ([]config.CIDRNetworkEntry, error) { + return config.ParseClusterSubnetEntriesWithDefaults(clusterSubnetCmd, 0, 0) } } - return ipv4Mode, ipv6Mode -} - -// LocalnetNetConfInfo is structure which holds specific secondary localnet network information -type LocalnetNetConfInfo struct { - subnets string - mtu int - excludeSubnets string - - VLANID int - ClusterSubnets []*net.IPNet - ExcludeSubnets []*net.IPNet -} -// CompareNetConf compares the localnetNetConfInfo with the given newNetConfInfo and returns true -// if they share the same configuration -func (localnetNetConfInfo *LocalnetNetConfInfo) CompareNetConf(newNetConfInfo NetConfInfo) bool { - var errs []error - var err error - - newLocalnetNetConfInfo, ok := newNetConfInfo.(*LocalnetNetConfInfo) - if !ok { - klog.V(5).Infof("New netconf topology type is different, expect %s", - localnetNetConfInfo.TopologyType()) - return false - } - if !isSubnetsStringEqual(localnetNetConfInfo.subnets, newLocalnetNetConfInfo.subnets) { - err = fmt.Errorf("new %s netconf subnets %v has changed, expect %v", - types.LocalnetTopology, newLocalnetNetConfInfo.subnets, localnetNetConfInfo.subnets) - errs = append(errs, err) - } - if localnetNetConfInfo.mtu != newLocalnetNetConfInfo.mtu { - err = fmt.Errorf("new %s netconf mtu %v has changed, expect %v", - types.LocalnetTopology, newLocalnetNetConfInfo.mtu, localnetNetConfInfo.mtu) - errs = append(errs, err) - } - if !isSubnetsStringEqual(localnetNetConfInfo.excludeSubnets, newLocalnetNetConfInfo.excludeSubnets) { - err = fmt.Errorf("new %s netconf excludeSubnets %v has changed, expect %v", - types.LocalnetTopology, newLocalnetNetConfInfo.excludeSubnets, localnetNetConfInfo.excludeSubnets) - errs = append(errs, err) - } - if localnetNetConfInfo.VLANID != newLocalnetNetConfInfo.VLANID { - err = fmt.Errorf("new %s netconf VLAN ID %v has changed, expect %v", - types.LocalnetTopology, newLocalnetNetConfInfo.VLANID, localnetNetConfInfo.VLANID) - errs = append(errs, err) + var subnets []config.CIDRNetworkEntry + if strings.TrimSpace(subnetsString) != "" { + var err error + subnets, err = parseSubnets(subnetsString) + if err != nil { + return nil, nil, err + } } - if len(errs) != 0 { - err = kerrors.NewAggregate(errs) - klog.V(5).Infof(err.Error()) - return false - } - return true -} - -func newLocalnetNetConfInfo(netconf *ovncnitypes.NetConf) (*LocalnetNetConfInfo, error) { - clusterSubnets, excludeSubnets, err := verifyExcludeIPs(netconf.Subnets, netconf.ExcludeSubnets) - if err != nil { - return nil, fmt.Errorf("invalid %s netconf %s: %v", netconf.Topology, netconf.Name, err) + var excludeIPNets []*net.IPNet + if strings.TrimSpace(excludeSubnetsString) != "" { + // For L2 topologies, host specific prefix length is ignored (using 0 as + // prefix length) + excludeSubnets, err := config.ParseClusterSubnetEntriesWithDefaults(excludeSubnetsString, 0, 0) + if err != nil { + return nil, nil, err + } + excludeIPNets = make([]*net.IPNet, 0, len(excludeSubnets)) + for _, excludeSubnet := range excludeSubnets { + found := false + for _, subnet := range subnets { + if ContainsCIDR(subnet.CIDR, excludeSubnet.CIDR) { + found = true + break + } + } + if !found { + return nil, nil, fmt.Errorf("the provided network subnets %v do not contain exluded subnets %v", + subnets, excludeSubnet.CIDR) + } + excludeIPNets = append(excludeIPNets, excludeSubnet.CIDR) + } } - return &LocalnetNetConfInfo{ - subnets: netconf.Subnets, - mtu: netconf.MTU, - VLANID: netconf.VLANID, - excludeSubnets: netconf.ExcludeSubnets, - ClusterSubnets: clusterSubnets, - ExcludeSubnets: excludeSubnets, - }, nil + return subnets, excludeIPNets, nil } -// TopologyType returns LocalnetNetConfInfo's topology type -func (localnetNetConfInfo *LocalnetNetConfInfo) TopologyType() string { - return types.LocalnetTopology -} - -// MTU returns LocalnetNetConfInfo's MTU value -func (localnetNetConfInfo *LocalnetNetConfInfo) MTU() int { - return localnetNetConfInfo.mtu -} - -// Subnets returns localnetNetConfInfo's subnets information -func (localnetNetConfInfo *LocalnetNetConfInfo) Subnets() []string { - subnets := strings.Split(localnetNetConfInfo.subnets, ",") - if len(subnets) == 1 && strings.TrimSpace(subnets[0]) == "" { - return nil - } - return subnets -} - -// IPMode returns the localnetNetConfInfo's ipv4/ipv6 mode -func (localnetNetConfInfo *LocalnetNetConfInfo) IPMode() (bool, bool) { +func getIPMode(subnets []config.CIDRNetworkEntry) (bool, bool) { var ipv6Mode, ipv4Mode bool - for _, subnet := range localnetNetConfInfo.ClusterSubnets { - if utilnet.IsIPv6CIDR(subnet) { + for _, subnet := range subnets { + if knet.IsIPv6CIDR(subnet.CIDR) { ipv6Mode = true } else { ipv4Mode = true @@ -518,9 +368,9 @@ func GetSecondaryNetworkPrefix(netName string) string { return name + "_" } -func newNetConfInfo(netconf *ovncnitypes.NetConf) (NetConfInfo, error) { +func NewNetInfo(netconf *ovncnitypes.NetConf) (NetInfo, error) { if netconf.Name == types.DefaultNetworkName { - return &DefaultNetConfInfo{}, nil + return &DefaultNetInfo{}, nil } switch netconf.Topology { case types.Layer3Topology: @@ -536,31 +386,18 @@ func newNetConfInfo(netconf *ovncnitypes.NetConf) (NetConfInfo, error) { } // ParseNADInfo parses config in NAD spec and return a NetAttachDefInfo object for secondary networks -func ParseNADInfo(netattachdef *nettypes.NetworkAttachmentDefinition) (NetInfo, NetConfInfo, error) { +func ParseNADInfo(netattachdef *nettypes.NetworkAttachmentDefinition) (NetInfo, error) { netconf, err := ParseNetConf(netattachdef) if err != nil { - return nil, nil, err + return nil, err } - netconfInfo, err := newNetConfInfo(netconf) + ni, err := NewNetInfo(netconf) if err != nil { - return nil, nil, err + return nil, err } - return NewNetInfo(netconf), netconfInfo, nil -} -// ParseNetConf returns NetInfo for the given netconf -func NewNetInfo(netconf *ovncnitypes.NetConf) NetInfo { - var nInfo NetInfo - if netconf.Name == types.DefaultNetworkName { - nInfo = &DefaultNetInfo{} - } else { - nInfo = &SecondaryNetInfo{ - netName: netconf.Name, - nadNames: &sync.Map{}, - } - } - return nInfo + return ni, nil } // ParseNetConf parses config in NAD spec for secondary networks diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index d0fdb1fe91..404116ddef 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -1,84 +1,124 @@ package util import ( - "fmt" "net" "testing" - "github.com/stretchr/testify/assert" -) + "github.com/onsi/gomega" -func parseIPNets(ipNetStrs ...string) []*net.IPNet { - ipNets := make([]*net.IPNet, len(ipNetStrs)) - for i := range ipNetStrs { - _, ipNet, err := net.ParseCIDR(ipNetStrs[i]) - if err != nil { - panic(fmt.Sprintf("Could not parse %q as a CIDR: %v", ipNetStrs[i], err)) - } - ipNets[i] = ipNet - } - return ipNets -} + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" +) -func TestParseSubnetsString(t *testing.T) { +func TestParseSubnets(t *testing.T) { tests := []struct { - desc string - input string - expOutput []*net.IPNet + desc string + topology string + subnets string + excludes string + expectedSubnets []config.CIDRNetworkEntry + expectedExcludes []*net.IPNet + expectError bool }{ { - desc: "positive, single IPv4 subnet", - input: "192.168.1.1/24", - expOutput: parseIPNets("192.168.1.1/24"), + desc: "multiple subnets layer 3 topology", + topology: types.Layer3Topology, + subnets: "192.168.1.1/26/28, fda6::/48", + expectedSubnets: []config.CIDRNetworkEntry{ + { + CIDR: ovntest.MustParseIPNet("192.168.1.0/26"), + HostSubnetLength: 28, + }, + { + CIDR: ovntest.MustParseIPNet("fda6::/48"), + HostSubnetLength: 64, + }, + }, }, { - desc: "positive, multiple IPv4 subnet", - input: "192.168.1.1/24, 192.168.2.1/24", - expOutput: parseIPNets("192.168.1.1/24", "192.168.2.1/24"), + desc: "empty subnets layer 3 topology", + topology: types.Layer3Topology, }, { - desc: "positive, empty string", - input: " ", - expOutput: []*net.IPNet{}, + desc: "multiple subnets and excludes layer 2 topology", + topology: types.Layer2Topology, + subnets: "192.168.1.1/26, fda6::/48", + excludes: "192.168.1.38/32, fda6::38/128", + expectedSubnets: []config.CIDRNetworkEntry{ + { + CIDR: ovntest.MustParseIPNet("192.168.1.0/26"), + }, + { + CIDR: ovntest.MustParseIPNet("fda6::/48"), + }, + }, + expectedExcludes: ovntest.MustParseIPNets("192.168.1.38/32", "fda6::38/128"), }, { - desc: "positive, single IPv6 subnet", - input: "2001:db8:3c4d::/48", - expOutput: parseIPNets("2001:db8:3c4d::/48"), + desc: "empty subnets layer 2 topology", + topology: types.Layer2Topology, }, { - desc: "positive, multiple IPv6 subnets", - input: "2001:db8:3c4d::/48, 2001:db8::1:0/64", - expOutput: parseIPNets("2001:db8:3c4d::/48", "2001:db8::0:0/64"), + desc: "invalid formatted excludes layer 2 topology", + topology: types.Layer2Topology, + subnets: "192.168.1.1/26", + excludes: "192.168.1.1/26/32", + expectError: true, }, { - desc: "negative, incorrect subnets case 1", - input: "192.168.1.1, 192.168.1.3/24", - expOutput: nil, + desc: "invalid not contained excludes layer 2 topology", + topology: types.Layer2Topology, + subnets: "fda6::/48", + excludes: "fda7::38/128", + expectError: true, }, { - desc: "negative, incorrect subnets case 2", - input: "abcde, 192.168.1.1/24", - expOutput: nil, + desc: "multiple subnets and excludes localnet topology", + topology: types.LocalnetTopology, + subnets: "192.168.1.1/26, fda6::/48", + excludes: "192.168.1.38/32, fda6::38/128", + expectedSubnets: []config.CIDRNetworkEntry{ + { + CIDR: ovntest.MustParseIPNet("192.168.1.0/26"), + }, + { + CIDR: ovntest.MustParseIPNet("fda6::/48"), + }, + }, + expectedExcludes: ovntest.MustParseIPNets("192.168.1.38/32", "fda6::38/128"), }, { - desc: "negative, incorrect subnets case 2", - input: "192.168.1.1/24; 192.168.1.2/24", - expOutput: nil, + desc: "empty subnets localnet topology", + topology: types.LocalnetTopology, + }, + { + desc: "invalid formatted excludes localnet topology", + topology: types.LocalnetTopology, + subnets: "fda6::/48", + excludes: "fda6::1/48/128", + expectError: true, + }, + { + desc: "invalid not contained excludes localnet topology", + topology: types.LocalnetTopology, + subnets: "192.168.1.1/26", + excludes: "192.168.2.38/32", + expectError: true, }, } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - resIPNets, err := parseSubnetsString(tc.input) - t.Log(resIPNets, err) - if tc.expOutput == nil { - assert.Error(t, err) - } else { - assert.Equal(t, len(resIPNets), len(tc.expOutput)) - for i := range resIPNets { - assert.Equal(t, *(resIPNets[i]), *(tc.expOutput[i])) - } + + for _, tc := range tests { + t.Run(tc.desc, func(t *testing.T) { + g := gomega.NewWithT(t) + subnets, excludes, err := parseSubnets(tc.subnets, tc.excludes, tc.topology) + if tc.expectError { + g.Expect(err).To(gomega.HaveOccurred()) + return } + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.ConsistOf(tc.expectedSubnets)) + g.Expect(excludes).To(gomega.ConsistOf(tc.expectedExcludes)) }) } } diff --git a/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/equate.go b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/equate.go new file mode 100644 index 0000000000..e54a76c7e3 --- /dev/null +++ b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/equate.go @@ -0,0 +1,156 @@ +// Copyright 2017, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cmpopts provides common options for the cmp package. +package cmpopts + +import ( + "errors" + "math" + "reflect" + "time" + + "github.com/google/go-cmp/cmp" +) + +func equateAlways(_, _ interface{}) bool { return true } + +// EquateEmpty returns a Comparer option that determines all maps and slices +// with a length of zero to be equal, regardless of whether they are nil. +// +// EquateEmpty can be used in conjunction with SortSlices and SortMaps. +func EquateEmpty() cmp.Option { + return cmp.FilterValues(isEmpty, cmp.Comparer(equateAlways)) +} + +func isEmpty(x, y interface{}) bool { + vx, vy := reflect.ValueOf(x), reflect.ValueOf(y) + return (x != nil && y != nil && vx.Type() == vy.Type()) && + (vx.Kind() == reflect.Slice || vx.Kind() == reflect.Map) && + (vx.Len() == 0 && vy.Len() == 0) +} + +// EquateApprox returns a Comparer option that determines float32 or float64 +// values to be equal if they are within a relative fraction or absolute margin. +// This option is not used when either x or y is NaN or infinite. +// +// The fraction determines that the difference of two values must be within the +// smaller fraction of the two values, while the margin determines that the two +// values must be within some absolute margin. +// To express only a fraction or only a margin, use 0 for the other parameter. +// The fraction and margin must be non-negative. +// +// The mathematical expression used is equivalent to: +// +// |x-y| ≤ max(fraction*min(|x|, |y|), margin) +// +// EquateApprox can be used in conjunction with EquateNaNs. +func EquateApprox(fraction, margin float64) cmp.Option { + if margin < 0 || fraction < 0 || math.IsNaN(margin) || math.IsNaN(fraction) { + panic("margin or fraction must be a non-negative number") + } + a := approximator{fraction, margin} + return cmp.Options{ + cmp.FilterValues(areRealF64s, cmp.Comparer(a.compareF64)), + cmp.FilterValues(areRealF32s, cmp.Comparer(a.compareF32)), + } +} + +type approximator struct{ frac, marg float64 } + +func areRealF64s(x, y float64) bool { + return !math.IsNaN(x) && !math.IsNaN(y) && !math.IsInf(x, 0) && !math.IsInf(y, 0) +} +func areRealF32s(x, y float32) bool { + return areRealF64s(float64(x), float64(y)) +} +func (a approximator) compareF64(x, y float64) bool { + relMarg := a.frac * math.Min(math.Abs(x), math.Abs(y)) + return math.Abs(x-y) <= math.Max(a.marg, relMarg) +} +func (a approximator) compareF32(x, y float32) bool { + return a.compareF64(float64(x), float64(y)) +} + +// EquateNaNs returns a Comparer option that determines float32 and float64 +// NaN values to be equal. +// +// EquateNaNs can be used in conjunction with EquateApprox. +func EquateNaNs() cmp.Option { + return cmp.Options{ + cmp.FilterValues(areNaNsF64s, cmp.Comparer(equateAlways)), + cmp.FilterValues(areNaNsF32s, cmp.Comparer(equateAlways)), + } +} + +func areNaNsF64s(x, y float64) bool { + return math.IsNaN(x) && math.IsNaN(y) +} +func areNaNsF32s(x, y float32) bool { + return areNaNsF64s(float64(x), float64(y)) +} + +// EquateApproxTime returns a Comparer option that determines two non-zero +// time.Time values to be equal if they are within some margin of one another. +// If both times have a monotonic clock reading, then the monotonic time +// difference will be used. The margin must be non-negative. +func EquateApproxTime(margin time.Duration) cmp.Option { + if margin < 0 { + panic("margin must be a non-negative number") + } + a := timeApproximator{margin} + return cmp.FilterValues(areNonZeroTimes, cmp.Comparer(a.compare)) +} + +func areNonZeroTimes(x, y time.Time) bool { + return !x.IsZero() && !y.IsZero() +} + +type timeApproximator struct { + margin time.Duration +} + +func (a timeApproximator) compare(x, y time.Time) bool { + // Avoid subtracting times to avoid overflow when the + // difference is larger than the largest representable duration. + if x.After(y) { + // Ensure x is always before y + x, y = y, x + } + // We're within the margin if x+margin >= y. + // Note: time.Time doesn't have AfterOrEqual method hence the negation. + return !x.Add(a.margin).Before(y) +} + +// AnyError is an error that matches any non-nil error. +var AnyError anyError + +type anyError struct{} + +func (anyError) Error() string { return "any error" } +func (anyError) Is(err error) bool { return err != nil } + +// EquateErrors returns a Comparer option that determines errors to be equal +// if errors.Is reports them to match. The AnyError error can be used to +// match any non-nil error. +func EquateErrors() cmp.Option { + return cmp.FilterValues(areConcreteErrors, cmp.Comparer(compareErrors)) +} + +// areConcreteErrors reports whether x and y are types that implement error. +// The input types are deliberately of the interface{} type rather than the +// error type so that we can handle situations where the current type is an +// interface{}, but the underlying concrete types both happen to implement +// the error interface. +func areConcreteErrors(x, y interface{}) bool { + _, ok1 := x.(error) + _, ok2 := y.(error) + return ok1 && ok2 +} + +func compareErrors(x, y interface{}) bool { + xe := x.(error) + ye := y.(error) + return errors.Is(xe, ye) || errors.Is(ye, xe) +} diff --git a/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/ignore.go b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/ignore.go new file mode 100644 index 0000000000..80c60617e4 --- /dev/null +++ b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/ignore.go @@ -0,0 +1,206 @@ +// Copyright 2017, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmpopts + +import ( + "fmt" + "reflect" + "unicode" + "unicode/utf8" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/internal/function" +) + +// IgnoreFields returns an Option that ignores fields of the +// given names on a single struct type. It respects the names of exported fields +// that are forwarded due to struct embedding. +// The struct type is specified by passing in a value of that type. +// +// The name may be a dot-delimited string (e.g., "Foo.Bar") to ignore a +// specific sub-field that is embedded or nested within the parent struct. +func IgnoreFields(typ interface{}, names ...string) cmp.Option { + sf := newStructFilter(typ, names...) + return cmp.FilterPath(sf.filter, cmp.Ignore()) +} + +// IgnoreTypes returns an Option that ignores all values assignable to +// certain types, which are specified by passing in a value of each type. +func IgnoreTypes(typs ...interface{}) cmp.Option { + tf := newTypeFilter(typs...) + return cmp.FilterPath(tf.filter, cmp.Ignore()) +} + +type typeFilter []reflect.Type + +func newTypeFilter(typs ...interface{}) (tf typeFilter) { + for _, typ := range typs { + t := reflect.TypeOf(typ) + if t == nil { + // This occurs if someone tries to pass in sync.Locker(nil) + panic("cannot determine type; consider using IgnoreInterfaces") + } + tf = append(tf, t) + } + return tf +} +func (tf typeFilter) filter(p cmp.Path) bool { + if len(p) < 1 { + return false + } + t := p.Last().Type() + for _, ti := range tf { + if t.AssignableTo(ti) { + return true + } + } + return false +} + +// IgnoreInterfaces returns an Option that ignores all values or references of +// values assignable to certain interface types. These interfaces are specified +// by passing in an anonymous struct with the interface types embedded in it. +// For example, to ignore sync.Locker, pass in struct{sync.Locker}{}. +func IgnoreInterfaces(ifaces interface{}) cmp.Option { + tf := newIfaceFilter(ifaces) + return cmp.FilterPath(tf.filter, cmp.Ignore()) +} + +type ifaceFilter []reflect.Type + +func newIfaceFilter(ifaces interface{}) (tf ifaceFilter) { + t := reflect.TypeOf(ifaces) + if ifaces == nil || t.Name() != "" || t.Kind() != reflect.Struct { + panic("input must be an anonymous struct") + } + for i := 0; i < t.NumField(); i++ { + fi := t.Field(i) + switch { + case !fi.Anonymous: + panic("struct cannot have named fields") + case fi.Type.Kind() != reflect.Interface: + panic("embedded field must be an interface type") + case fi.Type.NumMethod() == 0: + // This matches everything; why would you ever want this? + panic("cannot ignore empty interface") + default: + tf = append(tf, fi.Type) + } + } + return tf +} +func (tf ifaceFilter) filter(p cmp.Path) bool { + if len(p) < 1 { + return false + } + t := p.Last().Type() + for _, ti := range tf { + if t.AssignableTo(ti) { + return true + } + if t.Kind() != reflect.Ptr && reflect.PtrTo(t).AssignableTo(ti) { + return true + } + } + return false +} + +// IgnoreUnexported returns an Option that only ignores the immediate unexported +// fields of a struct, including anonymous fields of unexported types. +// In particular, unexported fields within the struct's exported fields +// of struct types, including anonymous fields, will not be ignored unless the +// type of the field itself is also passed to IgnoreUnexported. +// +// Avoid ignoring unexported fields of a type which you do not control (i.e. a +// type from another repository), as changes to the implementation of such types +// may change how the comparison behaves. Prefer a custom Comparer instead. +func IgnoreUnexported(typs ...interface{}) cmp.Option { + ux := newUnexportedFilter(typs...) + return cmp.FilterPath(ux.filter, cmp.Ignore()) +} + +type unexportedFilter struct{ m map[reflect.Type]bool } + +func newUnexportedFilter(typs ...interface{}) unexportedFilter { + ux := unexportedFilter{m: make(map[reflect.Type]bool)} + for _, typ := range typs { + t := reflect.TypeOf(typ) + if t == nil || t.Kind() != reflect.Struct { + panic(fmt.Sprintf("%T must be a non-pointer struct", typ)) + } + ux.m[t] = true + } + return ux +} +func (xf unexportedFilter) filter(p cmp.Path) bool { + sf, ok := p.Index(-1).(cmp.StructField) + if !ok { + return false + } + return xf.m[p.Index(-2).Type()] && !isExported(sf.Name()) +} + +// isExported reports whether the identifier is exported. +func isExported(id string) bool { + r, _ := utf8.DecodeRuneInString(id) + return unicode.IsUpper(r) +} + +// IgnoreSliceElements returns an Option that ignores elements of []V. +// The discard function must be of the form "func(T) bool" which is used to +// ignore slice elements of type V, where V is assignable to T. +// Elements are ignored if the function reports true. +func IgnoreSliceElements(discardFunc interface{}) cmp.Option { + vf := reflect.ValueOf(discardFunc) + if !function.IsType(vf.Type(), function.ValuePredicate) || vf.IsNil() { + panic(fmt.Sprintf("invalid discard function: %T", discardFunc)) + } + return cmp.FilterPath(func(p cmp.Path) bool { + si, ok := p.Index(-1).(cmp.SliceIndex) + if !ok { + return false + } + if !si.Type().AssignableTo(vf.Type().In(0)) { + return false + } + vx, vy := si.Values() + if vx.IsValid() && vf.Call([]reflect.Value{vx})[0].Bool() { + return true + } + if vy.IsValid() && vf.Call([]reflect.Value{vy})[0].Bool() { + return true + } + return false + }, cmp.Ignore()) +} + +// IgnoreMapEntries returns an Option that ignores entries of map[K]V. +// The discard function must be of the form "func(T, R) bool" which is used to +// ignore map entries of type K and V, where K and V are assignable to T and R. +// Entries are ignored if the function reports true. +func IgnoreMapEntries(discardFunc interface{}) cmp.Option { + vf := reflect.ValueOf(discardFunc) + if !function.IsType(vf.Type(), function.KeyValuePredicate) || vf.IsNil() { + panic(fmt.Sprintf("invalid discard function: %T", discardFunc)) + } + return cmp.FilterPath(func(p cmp.Path) bool { + mi, ok := p.Index(-1).(cmp.MapIndex) + if !ok { + return false + } + if !mi.Key().Type().AssignableTo(vf.Type().In(0)) || !mi.Type().AssignableTo(vf.Type().In(1)) { + return false + } + k := mi.Key() + vx, vy := mi.Values() + if vx.IsValid() && vf.Call([]reflect.Value{k, vx})[0].Bool() { + return true + } + if vy.IsValid() && vf.Call([]reflect.Value{k, vy})[0].Bool() { + return true + } + return false + }, cmp.Ignore()) +} diff --git a/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/sort.go b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/sort.go new file mode 100644 index 0000000000..0eb2a758c2 --- /dev/null +++ b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/sort.go @@ -0,0 +1,147 @@ +// Copyright 2017, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmpopts + +import ( + "fmt" + "reflect" + "sort" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/internal/function" +) + +// SortSlices returns a Transformer option that sorts all []V. +// The less function must be of the form "func(T, T) bool" which is used to +// sort any slice with element type V that is assignable to T. +// +// The less function must be: +// - Deterministic: less(x, y) == less(x, y) +// - Irreflexive: !less(x, x) +// - Transitive: if !less(x, y) and !less(y, z), then !less(x, z) +// +// The less function does not have to be "total". That is, if !less(x, y) and +// !less(y, x) for two elements x and y, their relative order is maintained. +// +// SortSlices can be used in conjunction with EquateEmpty. +func SortSlices(lessFunc interface{}) cmp.Option { + vf := reflect.ValueOf(lessFunc) + if !function.IsType(vf.Type(), function.Less) || vf.IsNil() { + panic(fmt.Sprintf("invalid less function: %T", lessFunc)) + } + ss := sliceSorter{vf.Type().In(0), vf} + return cmp.FilterValues(ss.filter, cmp.Transformer("cmpopts.SortSlices", ss.sort)) +} + +type sliceSorter struct { + in reflect.Type // T + fnc reflect.Value // func(T, T) bool +} + +func (ss sliceSorter) filter(x, y interface{}) bool { + vx, vy := reflect.ValueOf(x), reflect.ValueOf(y) + if !(x != nil && y != nil && vx.Type() == vy.Type()) || + !(vx.Kind() == reflect.Slice && vx.Type().Elem().AssignableTo(ss.in)) || + (vx.Len() <= 1 && vy.Len() <= 1) { + return false + } + // Check whether the slices are already sorted to avoid an infinite + // recursion cycle applying the same transform to itself. + ok1 := sort.SliceIsSorted(x, func(i, j int) bool { return ss.less(vx, i, j) }) + ok2 := sort.SliceIsSorted(y, func(i, j int) bool { return ss.less(vy, i, j) }) + return !ok1 || !ok2 +} +func (ss sliceSorter) sort(x interface{}) interface{} { + src := reflect.ValueOf(x) + dst := reflect.MakeSlice(src.Type(), src.Len(), src.Len()) + for i := 0; i < src.Len(); i++ { + dst.Index(i).Set(src.Index(i)) + } + sort.SliceStable(dst.Interface(), func(i, j int) bool { return ss.less(dst, i, j) }) + ss.checkSort(dst) + return dst.Interface() +} +func (ss sliceSorter) checkSort(v reflect.Value) { + start := -1 // Start of a sequence of equal elements. + for i := 1; i < v.Len(); i++ { + if ss.less(v, i-1, i) { + // Check that first and last elements in v[start:i] are equal. + if start >= 0 && (ss.less(v, start, i-1) || ss.less(v, i-1, start)) { + panic(fmt.Sprintf("incomparable values detected: want equal elements: %v", v.Slice(start, i))) + } + start = -1 + } else if start == -1 { + start = i + } + } +} +func (ss sliceSorter) less(v reflect.Value, i, j int) bool { + vx, vy := v.Index(i), v.Index(j) + return ss.fnc.Call([]reflect.Value{vx, vy})[0].Bool() +} + +// SortMaps returns a Transformer option that flattens map[K]V types to be a +// sorted []struct{K, V}. The less function must be of the form +// "func(T, T) bool" which is used to sort any map with key K that is +// assignable to T. +// +// Flattening the map into a slice has the property that cmp.Equal is able to +// use Comparers on K or the K.Equal method if it exists. +// +// The less function must be: +// - Deterministic: less(x, y) == less(x, y) +// - Irreflexive: !less(x, x) +// - Transitive: if !less(x, y) and !less(y, z), then !less(x, z) +// - Total: if x != y, then either less(x, y) or less(y, x) +// +// SortMaps can be used in conjunction with EquateEmpty. +func SortMaps(lessFunc interface{}) cmp.Option { + vf := reflect.ValueOf(lessFunc) + if !function.IsType(vf.Type(), function.Less) || vf.IsNil() { + panic(fmt.Sprintf("invalid less function: %T", lessFunc)) + } + ms := mapSorter{vf.Type().In(0), vf} + return cmp.FilterValues(ms.filter, cmp.Transformer("cmpopts.SortMaps", ms.sort)) +} + +type mapSorter struct { + in reflect.Type // T + fnc reflect.Value // func(T, T) bool +} + +func (ms mapSorter) filter(x, y interface{}) bool { + vx, vy := reflect.ValueOf(x), reflect.ValueOf(y) + return (x != nil && y != nil && vx.Type() == vy.Type()) && + (vx.Kind() == reflect.Map && vx.Type().Key().AssignableTo(ms.in)) && + (vx.Len() != 0 || vy.Len() != 0) +} +func (ms mapSorter) sort(x interface{}) interface{} { + src := reflect.ValueOf(x) + outType := reflect.StructOf([]reflect.StructField{ + {Name: "K", Type: src.Type().Key()}, + {Name: "V", Type: src.Type().Elem()}, + }) + dst := reflect.MakeSlice(reflect.SliceOf(outType), src.Len(), src.Len()) + for i, k := range src.MapKeys() { + v := reflect.New(outType).Elem() + v.Field(0).Set(k) + v.Field(1).Set(src.MapIndex(k)) + dst.Index(i).Set(v) + } + sort.Slice(dst.Interface(), func(i, j int) bool { return ms.less(dst, i, j) }) + ms.checkSort(dst) + return dst.Interface() +} +func (ms mapSorter) checkSort(v reflect.Value) { + for i := 1; i < v.Len(); i++ { + if !ms.less(v, i-1, i) { + panic(fmt.Sprintf("partial order detected: want %v < %v", v.Index(i-1), v.Index(i))) + } + } +} +func (ms mapSorter) less(v reflect.Value, i, j int) bool { + vx, vy := v.Index(i).Field(0), v.Index(j).Field(0) + return ms.fnc.Call([]reflect.Value{vx, vy})[0].Bool() +} diff --git a/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/struct_filter.go b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/struct_filter.go new file mode 100644 index 0000000000..ca11a40249 --- /dev/null +++ b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/struct_filter.go @@ -0,0 +1,189 @@ +// Copyright 2017, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmpopts + +import ( + "fmt" + "reflect" + "strings" + + "github.com/google/go-cmp/cmp" +) + +// filterField returns a new Option where opt is only evaluated on paths that +// include a specific exported field on a single struct type. +// The struct type is specified by passing in a value of that type. +// +// The name may be a dot-delimited string (e.g., "Foo.Bar") to select a +// specific sub-field that is embedded or nested within the parent struct. +func filterField(typ interface{}, name string, opt cmp.Option) cmp.Option { + // TODO: This is currently unexported over concerns of how helper filters + // can be composed together easily. + // TODO: Add tests for FilterField. + + sf := newStructFilter(typ, name) + return cmp.FilterPath(sf.filter, opt) +} + +type structFilter struct { + t reflect.Type // The root struct type to match on + ft fieldTree // Tree of fields to match on +} + +func newStructFilter(typ interface{}, names ...string) structFilter { + // TODO: Perhaps allow * as a special identifier to allow ignoring any + // number of path steps until the next field match? + // This could be useful when a concrete struct gets transformed into + // an anonymous struct where it is not possible to specify that by type, + // but the transformer happens to provide guarantees about the names of + // the transformed fields. + + t := reflect.TypeOf(typ) + if t == nil || t.Kind() != reflect.Struct { + panic(fmt.Sprintf("%T must be a non-pointer struct", typ)) + } + var ft fieldTree + for _, name := range names { + cname, err := canonicalName(t, name) + if err != nil { + panic(fmt.Sprintf("%s: %v", strings.Join(cname, "."), err)) + } + ft.insert(cname) + } + return structFilter{t, ft} +} + +func (sf structFilter) filter(p cmp.Path) bool { + for i, ps := range p { + if ps.Type().AssignableTo(sf.t) && sf.ft.matchPrefix(p[i+1:]) { + return true + } + } + return false +} + +// fieldTree represents a set of dot-separated identifiers. +// +// For example, inserting the following selectors: +// +// Foo +// Foo.Bar.Baz +// Foo.Buzz +// Nuka.Cola.Quantum +// +// Results in a tree of the form: +// +// {sub: { +// "Foo": {ok: true, sub: { +// "Bar": {sub: { +// "Baz": {ok: true}, +// }}, +// "Buzz": {ok: true}, +// }}, +// "Nuka": {sub: { +// "Cola": {sub: { +// "Quantum": {ok: true}, +// }}, +// }}, +// }} +type fieldTree struct { + ok bool // Whether this is a specified node + sub map[string]fieldTree // The sub-tree of fields under this node +} + +// insert inserts a sequence of field accesses into the tree. +func (ft *fieldTree) insert(cname []string) { + if ft.sub == nil { + ft.sub = make(map[string]fieldTree) + } + if len(cname) == 0 { + ft.ok = true + return + } + sub := ft.sub[cname[0]] + sub.insert(cname[1:]) + ft.sub[cname[0]] = sub +} + +// matchPrefix reports whether any selector in the fieldTree matches +// the start of path p. +func (ft fieldTree) matchPrefix(p cmp.Path) bool { + for _, ps := range p { + switch ps := ps.(type) { + case cmp.StructField: + ft = ft.sub[ps.Name()] + if ft.ok { + return true + } + if len(ft.sub) == 0 { + return false + } + case cmp.Indirect: + default: + return false + } + } + return false +} + +// canonicalName returns a list of identifiers where any struct field access +// through an embedded field is expanded to include the names of the embedded +// types themselves. +// +// For example, suppose field "Foo" is not directly in the parent struct, +// but actually from an embedded struct of type "Bar". Then, the canonical name +// of "Foo" is actually "Bar.Foo". +// +// Suppose field "Foo" is not directly in the parent struct, but actually +// a field in two different embedded structs of types "Bar" and "Baz". +// Then the selector "Foo" causes a panic since it is ambiguous which one it +// refers to. The user must specify either "Bar.Foo" or "Baz.Foo". +func canonicalName(t reflect.Type, sel string) ([]string, error) { + var name string + sel = strings.TrimPrefix(sel, ".") + if sel == "" { + return nil, fmt.Errorf("name must not be empty") + } + if i := strings.IndexByte(sel, '.'); i < 0 { + name, sel = sel, "" + } else { + name, sel = sel[:i], sel[i:] + } + + // Type must be a struct or pointer to struct. + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil, fmt.Errorf("%v must be a struct", t) + } + + // Find the canonical name for this current field name. + // If the field exists in an embedded struct, then it will be expanded. + sf, _ := t.FieldByName(name) + if !isExported(name) { + // Avoid using reflect.Type.FieldByName for unexported fields due to + // buggy behavior with regard to embeddeding and unexported fields. + // See https://golang.org/issue/4876 for details. + sf = reflect.StructField{} + for i := 0; i < t.NumField() && sf.Name == ""; i++ { + if t.Field(i).Name == name { + sf = t.Field(i) + } + } + } + if sf.Name == "" { + return []string{name}, fmt.Errorf("does not exist") + } + var ss []string + for i := range sf.Index { + ss = append(ss, t.FieldByIndex(sf.Index[:i+1]).Name) + } + if sel == "" { + return ss, nil + } + ssPost, err := canonicalName(sf.Type, sel) + return append(ss, ssPost...), err +} diff --git a/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/xform.go b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/xform.go new file mode 100644 index 0000000000..8812443a2f --- /dev/null +++ b/go-controller/vendor/github.com/google/go-cmp/cmp/cmpopts/xform.go @@ -0,0 +1,36 @@ +// Copyright 2018, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmpopts + +import ( + "github.com/google/go-cmp/cmp" +) + +type xformFilter struct{ xform cmp.Option } + +func (xf xformFilter) filter(p cmp.Path) bool { + for _, ps := range p { + if t, ok := ps.(cmp.Transform); ok && t.Option() == xf.xform { + return false + } + } + return true +} + +// AcyclicTransformer returns a Transformer with a filter applied that ensures +// that the transformer cannot be recursively applied upon its own output. +// +// An example use case is a transformer that splits a string by lines: +// +// AcyclicTransformer("SplitLines", func(s string) []string{ +// return strings.Split(s, "\n") +// }) +// +// Had this been an unfiltered Transformer instead, this would result in an +// infinite cycle converting a string to []string to [][]string and so on. +func AcyclicTransformer(name string, xformFunc interface{}) cmp.Option { + xf := xformFilter{cmp.Transformer(name, xformFunc)} + return cmp.FilterPath(xf.filter, xf.xform) +} diff --git a/go-controller/vendor/modules.txt b/go-controller/vendor/modules.txt index c1936aefa0..9b8c7e6945 100644 --- a/go-controller/vendor/modules.txt +++ b/go-controller/vendor/modules.txt @@ -136,6 +136,7 @@ github.com/google/gnostic/openapiv3 # github.com/google/go-cmp v0.5.9 ## explicit; go 1.13 github.com/google/go-cmp/cmp +github.com/google/go-cmp/cmp/cmpopts github.com/google/go-cmp/cmp/internal/diff github.com/google/go-cmp/cmp/internal/flags github.com/google/go-cmp/cmp/internal/function From 640f10bf69a2cf53fdcf1c3c6315b8d52d54ff63 Mon Sep 17 00:00:00 2001 From: William Zhao Date: Thu, 4 May 2023 10:44:49 -0400 Subject: [PATCH 39/90] Clean up the mgmt port on OvS bridge for the DPU-Host (Tenant Cluster) It is possible to run OvS on the DPU-Host with br-int. Therefore, we should properly use the "syncMgmtPortInterface()" function. Even with OvS not running on the DPU-Host, this function will properly run "unconfigureMgmtNetdevicePort()". Signed-off-by: William Zhao --- go-controller/pkg/node/management-port-dpu.go | 7 +------ go-controller/pkg/node/management-port_linux_test.go | 5 +++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go-controller/pkg/node/management-port-dpu.go b/go-controller/pkg/node/management-port-dpu.go index 638e1e60db..f623953eaa 100644 --- a/go-controller/pkg/node/management-port-dpu.go +++ b/go-controller/pkg/node/management-port-dpu.go @@ -194,12 +194,7 @@ func (mp *managementPortNetdev) Create(routeManager *routeManager, nodeAnnotator return nil, fmt.Errorf("failed to get link device for %s. %v", mp.netdevName, err) } } else if mp.netdevName != types.K8sMgmtIntfName { - if config.OvnKubeNode.Mode == types.NodeModeDPUHost { - // We do not expect OVS running here so just check if no old mgmt port netdevice exists and unconfigure it - err = unconfigureMgmtNetdevicePort(mp.hostSubnets, types.K8sMgmtIntfName) - } else { - err = syncMgmtPortInterface(mp.hostSubnets, types.K8sMgmtIntfName, false) - } + err = syncMgmtPortInterface(mp.hostSubnets, types.K8sMgmtIntfName, false) if err != nil { return nil, fmt.Errorf("failed to sync management port: %v", err) } diff --git a/go-controller/pkg/node/management-port_linux_test.go b/go-controller/pkg/node/management-port_linux_test.go index 94b0021209..cdd2d64f4c 100644 --- a/go-controller/pkg/node/management-port_linux_test.go +++ b/go-controller/pkg/node/management-port_linux_test.go @@ -403,6 +403,11 @@ func testManagementPortDPUHost(ctx *cli.Context, fexec *ovntest.FakeExec, testNS mtu int = 1400 ) + // OVS cmd setup + fexec.AddFakeCmdsNoOutputNoError([]string{ + "ovs-vsctl --timeout=15 --no-headings --data bare --format csv --columns type,name find Interface name=" + mgtPort, + }) + for _, cfg := range configs { if cfg.family == netlink.FAMILY_V4 { fexec.AddFakeCmd(&ovntest.ExpectedCmd{ From 7423d2c4ca45826fab7e8b687ada813993c08abd Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 28 Apr 2023 15:30:47 +0200 Subject: [PATCH 40/90] Configure TemplateLB with all host addresses TemplateLB variables for node IP addresses have to support multiple IP noda address, hence the variables are in the format of NODE_IPv4_0, NODE_IPv4_1, NODE_IPv4_2, ... Struct `NodeIPsTemplates` manage the template variables for multiple nodes that may have different number of IP addresses each. Add unit and e2e tests on TemplateLBs with multiple IP addresses. Refs: https://github.com/ovn-org/ovn-kubernetes/pull/3328 Signed-off-by: Andrea Panattoni --- .../pkg/ovn/controller/services/lb_config.go | 97 ++++++------ .../services/services_controller.go | 69 +++++---- .../services/services_controller_test.go | 141 ++++++++++++++++-- .../controller/services/svc_template_var.go | 76 ++++++++-- .../services/svc_template_var_test.go | 79 ++++++++++ test/e2e/e2e.go | 55 ++++--- 6 files changed, 398 insertions(+), 119 deletions(-) create mode 100644 go-controller/pkg/ovn/controller/services/svc_template_var_test.go diff --git a/go-controller/pkg/ovn/controller/services/lb_config.go b/go-controller/pkg/ovn/controller/services/lb_config.go index eac6724c74..0ad3b78126 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config.go +++ b/go-controller/pkg/ovn/controller/services/lb_config.go @@ -340,7 +340,7 @@ func buildClusterLBs(service *v1.Service, configs []lbConfig, nodeInfos []nodeIn // NodePort services with ETP=local or affinity timeout set still need // non-template per-node LBs. func buildTemplateLBs(service *v1.Service, configs []lbConfig, nodes []nodeInfo, - nodeIPv4Template, nodeIPv6Template *Template) []LB { + nodeIPv4Templates, nodeIPv6Templates *NodeIPsTemplates) []LB { cbp := configsByProto(configs) eids := util.ExternalIDsForObject(service) @@ -433,57 +433,63 @@ func buildTemplateLBs(service *v1.Service, configs []lbConfig, nodes []nodeInfo, sharedV6Targets = joinHostsPort(config.eps.V6IPs, config.eps.Port) } - if switchV4TargetNeedsTemplate { - switchV4Rules = append(switchV4Rules, LBRule{ - Source: Addr{Template: nodeIPv4Template, Port: config.inport}, - Targets: []Addr{{Template: switchV4TemplateTarget}}, - }) - } else { - switchV4Rules = append(switchV4Rules, LBRule{ - Source: Addr{Template: nodeIPv4Template, Port: config.inport}, - Targets: sharedV4Targets, - }) - } + for _, nodeIPv4Template := range nodeIPv4Templates.AsTemplates() { - if switchV6TargetNeedsTemplate { - switchV6Rules = append(switchV6Rules, LBRule{ - Source: Addr{Template: nodeIPv6Template, Port: config.inport}, - Targets: []Addr{{Template: switchV6TemplateTarget}}, - }) - } else { - switchV6Rules = append(switchV6Rules, LBRule{ - Source: Addr{Template: nodeIPv6Template, Port: config.inport}, - Targets: sharedV6Targets, - }) - } + if switchV4TargetNeedsTemplate { + switchV4Rules = append(switchV4Rules, LBRule{ + Source: Addr{Template: nodeIPv4Template, Port: config.inport}, + Targets: []Addr{{Template: switchV4TemplateTarget}}, + }) + } else { + switchV4Rules = append(switchV4Rules, LBRule{ + Source: Addr{Template: nodeIPv4Template, Port: config.inport}, + Targets: sharedV4Targets, + }) + } - if routerV4TargetNeedsTemplate { - routerV4Rules = append(routerV4Rules, LBRule{ - Source: Addr{Template: nodeIPv4Template, Port: config.inport}, - Targets: []Addr{{Template: routerV4TemplateTarget}}, - }) - } else { - routerV4Rules = append(routerV4Rules, LBRule{ - Source: Addr{Template: nodeIPv4Template, Port: config.inport}, - Targets: sharedV4Targets, - }) + if routerV4TargetNeedsTemplate { + routerV4Rules = append(routerV4Rules, LBRule{ + Source: Addr{Template: nodeIPv4Template, Port: config.inport}, + Targets: []Addr{{Template: routerV4TemplateTarget}}, + }) + } else { + routerV4Rules = append(routerV4Rules, LBRule{ + Source: Addr{Template: nodeIPv4Template, Port: config.inport}, + Targets: sharedV4Targets, + }) + } } - if routerV6TargetNeedsTemplate { - routerV6Rules = append(routerV6Rules, LBRule{ - Source: Addr{Template: nodeIPv6Template, Port: config.inport}, - Targets: []Addr{{Template: routerV6TemplateTarget}}, - }) - } else { - routerV6Rules = append(routerV6Rules, LBRule{ - Source: Addr{Template: nodeIPv6Template, Port: config.inport}, - Targets: sharedV6Targets, - }) + for _, nodeIPv6Template := range nodeIPv6Templates.AsTemplates() { + + if switchV6TargetNeedsTemplate { + switchV6Rules = append(switchV6Rules, LBRule{ + Source: Addr{Template: nodeIPv6Template, Port: config.inport}, + Targets: []Addr{{Template: switchV6TemplateTarget}}, + }) + } else { + switchV6Rules = append(switchV6Rules, LBRule{ + Source: Addr{Template: nodeIPv6Template, Port: config.inport}, + Targets: sharedV6Targets, + }) + } + + if routerV6TargetNeedsTemplate { + routerV6Rules = append(routerV6Rules, LBRule{ + Source: Addr{Template: nodeIPv6Template, Port: config.inport}, + Targets: []Addr{{Template: routerV6TemplateTarget}}, + }) + } else { + routerV6Rules = append(routerV6Rules, LBRule{ + Source: Addr{Template: nodeIPv6Template, Port: config.inport}, + Targets: sharedV6Targets, + }) + } } } } - if nodeIPv4Template.len() > 0 { + if nodeIPv4Templates.Len() > 0 { if len(switchV4Rules) > 0 { out = append(out, LB{ Name: makeLBName(service, proto, "node_switch_template_IPv4"), @@ -507,7 +513,8 @@ func buildTemplateLBs(service *v1.Service, configs []lbConfig, nodes []nodeInfo, }) } } - if nodeIPv6Template.len() > 0 { + + if nodeIPv6Templates.Len() > 0 { if len(switchV6Rules) > 0 { out = append(out, LB{ Name: makeLBName(service, proto, "node_switch_template_IPv6"), diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index e1bb090955..da0f257d75 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -58,13 +58,13 @@ func NewController(client clientset.Interface, klog.V(4).Info("Creating event broadcaster") c := &Controller{ - client: client, - nbClient: nbClient, - queue: workqueue.NewNamedRateLimitingQueue(newRatelimiter(100), controllerName), - workerLoopPeriod: time.Second, - alreadyApplied: map[string][]LB{}, - nodeIPv4Template: makeTemplate(makeLBNodeIPTemplateName(v1.IPv4Protocol)), - nodeIPv6Template: makeTemplate(makeLBNodeIPTemplateName(v1.IPv6Protocol)), + client: client, + nbClient: nbClient, + queue: workqueue.NewNamedRateLimitingQueue(newRatelimiter(100), controllerName), + workerLoopPeriod: time.Second, + alreadyApplied: map[string][]LB{}, + nodeIPv4Templates: NewNodeIPsTemplates(v1.IPv4Protocol), + nodeIPv6Templates: NewNodeIPsTemplates(v1.IPv6Protocol), } // services @@ -158,10 +158,10 @@ type Controller struct { // chassis' node IP (v4 and v6). // Must be accessed only with the nodeInfo mutex taken. // These are written in RequestFullSync(). - nodeInfos []nodeInfo - nodeIPv4Template *Template - nodeIPv6Template *Template - nodeInfoRWLock sync.RWMutex + nodeInfos []nodeInfo + nodeIPv4Templates *NodeIPsTemplates + nodeIPv6Templates *NodeIPsTemplates + nodeInfoRWLock sync.RWMutex // alreadyApplied is a map of service key -> already applied configuration, so we can short-circuit // if a service's config hasn't changed @@ -405,7 +405,7 @@ func (c *Controller) syncService(key string) error { // Convert the LB configs in to load-balancer objects clusterLBs := buildClusterLBs(service, clusterConfigs, c.nodeInfos, c.useLBGroups) templateLBs := buildTemplateLBs(service, templateConfigs, c.nodeInfos, - c.nodeIPv4Template, c.nodeIPv6Template) + c.nodeIPv4Templates, c.nodeIPv6Templates) perNodeLBs := buildPerNodeLBs(service, perNodeConfigs, c.nodeInfos) klog.V(5).Infof("Built service %s cluster-wide LB %#v", key, clusterLBs) klog.V(5).Infof("Built service %s per-node LB %#v", key, perNodeLBs) @@ -457,38 +457,45 @@ func (c *Controller) syncNodeInfos(nodeInfos []nodeInfo) { } // Compute the nodeIP template values. - c.nodeIPv4Template = makeTemplate(makeLBNodeIPTemplateName(v1.IPv4Protocol)) - c.nodeIPv6Template = makeTemplate(makeLBNodeIPTemplateName(v1.IPv6Protocol)) + c.nodeIPv4Templates = NewNodeIPsTemplates(v1.IPv4Protocol) + c.nodeIPv6Templates = NewNodeIPsTemplates(v1.IPv6Protocol) - for _, node := range c.nodeInfos { - if node.chassisID == "" { + for _, nodeInfo := range c.nodeInfos { + if nodeInfo.chassisID == "" { continue } - // Services are currently supported only on the node's first IP. - // Extract that one and populate the node's IP template value. + if globalconfig.IPv4Mode { - if ipv4, err := util.MatchFirstIPFamily(false, node.l3gatewayAddresses); err == nil { - c.nodeIPv4Template.Value[node.chassisID] = ipv4.String() + ips, err := util.MatchIPFamily(false, nodeInfo.hostAddresses) + if err != nil { + klog.Warningf("Error while searching for IPv4 host addresses in %v for node[%s] : %v", + nodeInfo.hostAddresses, nodeInfo.name, err) + continue + } + + for _, ip := range ips { + c.nodeIPv4Templates.AddIP(nodeInfo.chassisID, ip) } } + if globalconfig.IPv6Mode { - if ipv6, err := util.MatchFirstIPFamily(true, node.l3gatewayAddresses); err == nil { - c.nodeIPv6Template.Value[node.chassisID] = ipv6.String() + ips, err := util.MatchIPFamily(true, nodeInfo.hostAddresses) + if err != nil { + klog.Warningf("Error while searching for IPv6 host addresses in %v for node[%s] : %v", + nodeInfo.hostAddresses, nodeInfo.name, err) + continue + } + + for _, ip := range ips { + c.nodeIPv6Templates.AddIP(nodeInfo.chassisID, ip) } } } // Sync the nodeIP template values to the DB. - nodeIPTemplateMap := TemplateMap{} - if c.nodeIPv4Template.len() > 0 { - nodeIPTemplateMap[c.nodeIPv4Template.Name] = c.nodeIPv4Template - } - if c.nodeIPv6Template.len() > 0 { - nodeIPTemplateMap[c.nodeIPv6Template.Name] = c.nodeIPv6Template - } - nodeIPTemplates := []TemplateMap{ - nodeIPTemplateMap, + c.nodeIPv4Templates.AsTemplateMap(), + c.nodeIPv6Templates.AsTemplateMap(), } if err := svcCreateOrUpdateTemplateVar(c.nbClient, nodeIPTemplates); err != nil { klog.Errorf("Could not sync node IP templates") diff --git a/go-controller/pkg/ovn/controller/services/services_controller_test.go b/go-controller/pkg/ovn/controller/services/services_controller_test.go index 485aa40a71..b8c7d5a1e1 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller_test.go +++ b/go-controller/pkg/ovn/controller/services/services_controller_test.go @@ -30,6 +30,11 @@ import ( var alwaysReady = func() bool { return true } var FakeGRs = "GR_1 GR_2" +var initialLsGroups []string = []string{types.ClusterLBGroupName, types.ClusterSwitchLBGroupName} +var initialLrGroups []string = []string{types.ClusterLBGroupName, types.ClusterRouterLBGroupName} + +var outport int32 = int32(3456) +var tcp v1.Protocol = v1.ProtocolTCP type serviceController struct { *Controller @@ -108,8 +113,6 @@ func TestSyncServices(t *testing.T) { ns := "testns" serviceName := "foo" - initialLsGroups := []string{types.ClusterLBGroupName, types.ClusterSwitchLBGroupName} - initialLrGroups := []string{types.ClusterLBGroupName, types.ClusterRouterLBGroupName} oldGateway := globalconfig.Gateway.Mode oldClusterSubnet := globalconfig.Default.ClusterSubnets @@ -125,9 +128,6 @@ func TestSyncServices(t *testing.T) { _, cidr6, _ := net.ParseCIDR("fe00::/64") globalconfig.Default.ClusterSubnets = []globalconfig.CIDRNetworkEntry{{cidr4, 26}, {cidr6, 26}} - outport := int32(3456) - tcp := v1.ProtocolTCP - const ( nodeA = "node-a" nodeB = "node-b" @@ -510,6 +510,120 @@ func TestSyncServices(t *testing.T) { } } +func Test_ETPCluster_NodePort_Service_WithMultipleIPAddresses(t *testing.T) { + g := gomega.NewGomegaWithT(t) + globalconfig.IPv4Mode = true + + nodeA := nodeInfo{ + name: "node-a", + l3gatewayAddresses: []net.IP{net.ParseIP("10.1.1.1")}, + hostAddresses: []net.IP{ + net.ParseIP("10.1.1.1"), + net.ParseIP("10.2.2.2"), + net.ParseIP("10.3.3.3")}, + gatewayRouterName: nodeGWRouterName("node-a"), + switchName: nodeSwitchName("node-a"), + chassisID: "node-a", + zone: types.OvnDefaultZone, + } + + svc := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: "svc-foo", Namespace: "namespace1"}, + Spec: v1.ServiceSpec{ + Type: v1.ServiceTypeNodePort, + ClusterIP: "192.168.1.1", + ClusterIPs: []string{"192.168.1.1"}, + Selector: map[string]string{"foo": "bar"}, + ExternalTrafficPolicy: v1.ServiceExternalTrafficPolicyTypeCluster, + Ports: []v1.ServicePort{{ + Port: 80, + Protocol: v1.ProtocolTCP, + TargetPort: intstr.FromInt(3456), + NodePort: 30123, + }}, + }, + } + + endPointSlice := &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: svc.Name + "ab23", + Namespace: svc.Namespace, + Labels: map[string]string{discovery.LabelServiceName: svc.Name}, + }, + Ports: []discovery.EndpointPort{ + { + Protocol: &tcp, + Port: &outport, + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{ + readyEndpointsWithAddresses("10.128.0.2", "10.128.1.2"), + }, + } + + controller, err := newControllerWithDBSetup(libovsdbtest.TestSetup{NBData: []libovsdbtest.TestData{ + nodeLogicalSwitch(nodeA.name, initialLsGroups), + nodeLogicalRouter(nodeA.name, initialLrGroups), + + lbGroup(types.ClusterLBGroupName), + lbGroup(types.ClusterSwitchLBGroupName), + lbGroup(types.ClusterRouterLBGroupName), + }}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + defer controller.close() + + controller.endpointSliceStore.Add(endPointSlice) + controller.serviceStore.Add(svc) + controller.nodeTracker.nodes = map[string]nodeInfo{nodeA.name: nodeA} + + controller.RequestFullSync(controller.nodeTracker.getZoneNodes()) + err = controller.syncService(svc.Namespace + "/" + svc.Name) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + expectedDb := []libovsdbtest.TestData{ + &nbdb.LoadBalancer{ + UUID: nodeSwitchRouterLoadBalancerName(nodeA.name, svc.Namespace, svc.Name), + Name: nodeSwitchRouterLoadBalancerName(nodeA.name, svc.Namespace, svc.Name), + Options: servicesOptions(), + Protocol: &nbdb.LoadBalancerProtocolTCP, + Vips: map[string]string{ + "192.168.1.1:80": "10.128.0.2:3456,10.128.1.2:3456", + }, + ExternalIDs: serviceExternalIDs(namespacedServiceName(svc.Namespace, svc.Name)), + }, + &nbdb.LoadBalancer{ + UUID: "Service_namespace1/svc-foo_TCP_node_switch_template_IPv4_merged", + Name: "Service_namespace1/svc-foo_TCP_node_switch_template_IPv4_merged", + Options: templateServicesOptions(), + Protocol: &nbdb.LoadBalancerProtocolTCP, + Vips: map[string]string{ + "^NODEIP_IPv4_1:30123": "10.128.0.2:3456,10.128.1.2:3456", + "^NODEIP_IPv4_2:30123": "10.128.0.2:3456,10.128.1.2:3456", + "^NODEIP_IPv4_0:30123": "10.128.0.2:3456,10.128.1.2:3456", + }, + ExternalIDs: serviceExternalIDs(namespacedServiceName(svc.Namespace, svc.Name)), + }, + nodeLogicalSwitch(nodeA.name, initialLsGroups, "Service_namespace1/svc-foo_TCP_node_router+switch_node-a"), + nodeLogicalRouter(nodeA.name, initialLrGroups, "Service_namespace1/svc-foo_TCP_node_router+switch_node-a"), + lbGroup(types.ClusterLBGroupName), + lbGroup(types.ClusterSwitchLBGroupName, nodeMergedTemplateLoadBalancerName(svc.Namespace, svc.Name, v1.IPv4Protocol)), + lbGroup(types.ClusterRouterLBGroupName, nodeMergedTemplateLoadBalancerName(svc.Namespace, svc.Name, v1.IPv4Protocol)), + + &nbdb.ChassisTemplateVar{ + UUID: nodeA.chassisID, Chassis: nodeA.chassisID, + Variables: map[string]string{ + makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "0": nodeA.hostAddresses[0].String(), + makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "1": nodeA.hostAddresses[1].String(), + makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "2": nodeA.hostAddresses[2].String(), + }, + }, + } + + g.Expect(controller.nbClient).To(libovsdbtest.HaveData(expectedDb)) + +} + func nodeLogicalSwitch(nodeName string, lbGroups []string, namespacedServiceNames ...string) *nbdb.LogicalSwitch { ls := &nbdb.LogicalSwitch{ UUID: nodeSwitchName(nodeName), @@ -625,7 +739,7 @@ func serviceExternalIDs(namespacedServiceName string) map[string]string { } func nodeSwitchTemplateLoadBalancer(nodePort int32, serviceName string, serviceNamespace string) *nbdb.LoadBalancer { - nodeTemplateIP := makeTemplate(makeLBNodeIPTemplateName(v1.IPv4Protocol)) + nodeTemplateIP := makeTemplate(makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "0") return &nbdb.LoadBalancer{ UUID: nodeSwitchTemplateLoadBalancerName(serviceNamespace, serviceName, v1.IPv4Protocol), Name: nodeSwitchTemplateLoadBalancerName(serviceNamespace, serviceName, v1.IPv4Protocol), @@ -639,7 +753,7 @@ func nodeSwitchTemplateLoadBalancer(nodePort int32, serviceName string, serviceN } func nodeRouterTemplateLoadBalancer(nodePort int32, serviceName string, serviceNamespace string) *nbdb.LoadBalancer { - nodeTemplateIP := makeTemplate(makeLBNodeIPTemplateName(v1.IPv4Protocol)) + nodeTemplateIP := makeTemplate(makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "0") return &nbdb.LoadBalancer{ UUID: nodeRouterTemplateLoadBalancerName(serviceNamespace, serviceName, v1.IPv4Protocol), Name: nodeRouterTemplateLoadBalancerName(serviceNamespace, serviceName, v1.IPv4Protocol), @@ -657,13 +771,13 @@ func nodeIPTemplate(node *nodeInfo) *nbdb.ChassisTemplateVar { UUID: node.chassisID, Chassis: node.chassisID, Variables: map[string]string{ - makeLBNodeIPTemplateName(v1.IPv4Protocol): node.hostAddresses[0].String(), + makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "0": node.hostAddresses[0].String(), }, } } func nodeMergedTemplateLoadBalancer(nodePort int32, serviceName string, serviceNamespace string, outputPort int32, endpointIPs ...string) *nbdb.LoadBalancer { - nodeTemplateIP := makeTemplate(makeLBNodeIPTemplateName(v1.IPv4Protocol)) + nodeTemplateIP := makeTemplate(makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "0") return &nbdb.LoadBalancer{ UUID: nodeMergedTemplateLoadBalancerName(serviceNamespace, serviceName, v1.IPv4Protocol), Name: nodeMergedTemplateLoadBalancerName(serviceNamespace, serviceName, v1.IPv4Protocol), @@ -751,3 +865,12 @@ func deleteTestNBGlobal(nbClient libovsdbclient.Client, zone string) error { return nil } + +func readyEndpointsWithAddresses(addresses ...string) discovery.Endpoint { + return discovery.Endpoint{ + Conditions: discovery.EndpointConditions{ + Ready: utilpointer.Bool(true), + }, + Addresses: addresses, + } +} diff --git a/go-controller/pkg/ovn/controller/services/svc_template_var.go b/go-controller/pkg/ovn/controller/services/svc_template_var.go index 7c52bd365d..9ed0cab13d 100644 --- a/go-controller/pkg/ovn/controller/services/svc_template_var.go +++ b/go-controller/pkg/ovn/controller/services/svc_template_var.go @@ -2,6 +2,7 @@ package services import ( "fmt" + "net" "regexp" "strings" @@ -30,12 +31,6 @@ type Template struct { type TemplateMap map[string]*Template type ChassisTemplateVarMap map[string]*nbdb.ChassisTemplateVar -// len returns the number of chasis on which this Template variable is -// instantiated (has a value). -func (t *Template) len() int { - return len(t.Value) -} - // toReferenceString returns the textual representation of a template // reference, that is, '^'. func (t *Template) toReferenceString() string { @@ -179,16 +174,16 @@ func svcCreateOrUpdateTemplateVar(nbClient libovsdbclient.Client, templateVars [ return err } -// makeLBNodeIPTemplateName creates a template name for the node IP (per family) -func makeLBNodeIPTemplateName(family corev1.IPFamily) string { - return fmt.Sprintf("%s_%v", LBVipNodeTemplate, family) +// makeLBNodeIPTemplateNamePrefix creates a template name prefix for the node IP (per family) +func makeLBNodeIPTemplateNamePrefix(family corev1.IPFamily) string { + return fmt.Sprintf("%s_%v_", LBVipNodeTemplate, family) } -// isLBNodeIPTemplateName returns true if 'name' is the node IP template name -// for any IP family. +// isLBNodeIPTemplateName returns true if 'name' is a node IP template name +// for any IP family (i.e. in the form NODEIP_IPv4_X). func isLBNodeIPTemplateName(name string) bool { - return name == makeLBNodeIPTemplateName(corev1.IPv4Protocol) || - name == makeLBNodeIPTemplateName(corev1.IPv6Protocol) + return strings.HasPrefix(name, makeLBNodeIPTemplateNamePrefix(corev1.IPv4Protocol)) || + strings.HasPrefix(name, makeLBNodeIPTemplateNamePrefix(corev1.IPv6Protocol)) } // makeLBTargetTemplateName builds a load balancer target template name. @@ -215,3 +210,58 @@ func getTemplatesFromRulesTargets(rules []LBRule) TemplateMap { } return templates } + +// NodeIPTemplates maintains templates variables for many IP addresses per node, +// creating them in the form NODEIP_IPv4_0, NODEIP_IPv4_1, NODEIP_IPv4_2, ... +// if and when they are needed. +type NodeIPsTemplates struct { + ipFamily corev1.IPFamily + templates []*Template +} + +func NewNodeIPsTemplates(ipFamily corev1.IPFamily) *NodeIPsTemplates { + return &NodeIPsTemplates{ + ipFamily: ipFamily, + templates: make([]*Template, 0), + } +} + +// AddIP adds a template variable for the specified chassis and ip address. +func (n *NodeIPsTemplates) AddIP(chassisID string, ip net.IP) { + + for _, template := range n.templates { + _, ok := template.Value[chassisID] + if !ok { + template.Value[chassisID] = ip.String() + return + } + } + + // NODEIP_IPvN_XXX is missing, creating it. + newTemplate := makeTemplate( + makeLBNodeIPTemplateNamePrefix(n.ipFamily) + fmt.Sprint(len(n.templates)), + ) + + // And initialize with chassisID value + newTemplate.Value[chassisID] = ip.String() + + n.templates = append(n.templates, newTemplate) +} + +func (n *NodeIPsTemplates) AsTemplateMap() TemplateMap { + var ret TemplateMap = TemplateMap{} + + for _, t := range n.templates { + ret[t.Name] = t + } + + return ret +} + +func (n *NodeIPsTemplates) AsTemplates() []*Template { + return n.templates +} + +func (n *NodeIPsTemplates) Len() int { + return len(n.templates) +} diff --git a/go-controller/pkg/ovn/controller/services/svc_template_var_test.go b/go-controller/pkg/ovn/controller/services/svc_template_var_test.go new file mode 100644 index 0000000000..cf71f3d675 --- /dev/null +++ b/go-controller/pkg/ovn/controller/services/svc_template_var_test.go @@ -0,0 +1,79 @@ +package services + +import ( + "net" + "sort" + "testing" + + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" +) + +func Test_NodeIPTemplates_SingleIP(t *testing.T) { + g := gomega.NewGomegaWithT(t) + + // System Under Test + sut := NewNodeIPsTemplates(v1.IPv4Protocol) + + sut.AddIP("ch1", net.ParseIP("11.11.0.1")) + sut.AddIP("ch2", net.ParseIP("11.11.0.2")) + + g.Expect(sut.Len()).To(gomega.Equal(1)) + g.Expect(sut.AsTemplates()). + To(gomega.Equal( + []*Template{{ + Name: "NODEIP_IPv4_0", + Value: map[string]string{ + "ch1": "11.11.0.1", + "ch2": "11.11.0.2", + }, + }}, + )) +} + +func Test_NodeIPTemplates_DifferentIPCount(t *testing.T) { + g := gomega.NewGomegaWithT(t) + + // System Under Test + sut := NewNodeIPsTemplates(v1.IPv4Protocol) + + sut.AddIP("ch1", net.ParseIP("11.11.0.1")) + sut.AddIP("ch2", net.ParseIP("11.11.0.2")) + sut.AddIP("ch2", net.ParseIP("22.22.0.2")) + sut.AddIP("ch3", net.ParseIP("11.11.0.3")) + sut.AddIP("ch3", net.ParseIP("22.22.0.3")) + sut.AddIP("ch3", net.ParseIP("33.33.0.3")) + + g.Expect(sut.Len()).To(gomega.Equal(3)) + + templates := sut.AsTemplates() + sortTemplateSliceByName(templates) + g.Expect(templates). + To(gomega.BeEquivalentTo( + []*Template{{ + Name: "NODEIP_IPv4_0", + Value: map[string]string{ + "ch1": "11.11.0.1", + "ch2": "11.11.0.2", + "ch3": "11.11.0.3", + }, + }, { + Name: "NODEIP_IPv4_1", + Value: map[string]string{ + "ch2": "22.22.0.2", + "ch3": "22.22.0.3", + }, + }, { + Name: "NODEIP_IPv4_2", + Value: map[string]string{ + "ch3": "33.33.0.3", + }, + }}, + )) +} + +func sortTemplateSliceByName(input []*Template) { + sort.Slice(input, func(i, j int) bool { + return input[i].Name < input[j].Name + }) +} diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index dd64a77ae3..0390ad9a47 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -2273,18 +2273,26 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { // This test verifies a NodePort service is reachable on manually added IP addresses. ginkgo.It("for NodePort services", func() { isIPv6Cluster := IsIPv6Cluster(f.ClientSet) - serviceName := "nodeportservice" - ginkgo.By("Creating NodePort service") - svcSpec := nodePortServiceSpecFrom(serviceName, v1.IPFamilyPolicyPreferDualStack, endpointHTTPPort, endpointUDPPort, clusterHTTPPort, clusterUDPPort, endpointsSelector, v1.ServiceExternalTrafficPolicyTypeLocal) - svcSpec, err := f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.Background(), svcSpec, metav1.CreateOptions{}) + ginkgo.By("Creating NodePort services") + + etpLocalServiceName := "etplocal-svc" + etpLocalSvc := nodePortServiceSpecFrom(etpLocalServiceName, v1.IPFamilyPolicyPreferDualStack, endpointHTTPPort, endpointUDPPort, clusterHTTPPort, clusterUDPPort, endpointsSelector, v1.ServiceExternalTrafficPolicyTypeLocal) + etpLocalSvc, err := f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.Background(), etpLocalSvc, metav1.CreateOptions{}) + framework.ExpectNoError(err) + + etpClusterServiceName := "etpcluster-svc" + etpClusterSvc := nodePortServiceSpecFrom(etpClusterServiceName, v1.IPFamilyPolicyPreferDualStack, endpointHTTPPort, endpointUDPPort, clusterHTTPPort, clusterUDPPort, endpointsSelector, v1.ServiceExternalTrafficPolicyTypeCluster) + etpClusterSvc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.Background(), etpClusterSvc, metav1.CreateOptions{}) framework.ExpectNoError(err) ginkgo.By("Waiting for the endpoints to pop up") - err = framework.WaitForServiceEndpointsNum(f.ClientSet, f.Namespace.Name, serviceName, len(endPoints), time.Second, wait.ForeverTestTimeout) - framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", serviceName, f.Namespace.Name) - tcpNodePort, udpNodePort := nodePortsFromService(svcSpec) + err = framework.WaitForServiceEndpointsNum(f.ClientSet, f.Namespace.Name, etpLocalServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpLocalServiceName, f.Namespace.Name) + + err = framework.WaitForServiceEndpointsNum(f.ClientSet, f.Namespace.Name, etpClusterServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpClusterServiceName, f.Namespace.Name) toCheckNodesAddresses := sets.NewString() for _, node := range nodes.Items { @@ -2306,24 +2314,29 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { } } - for _, protocol := range []string{"http", "udp"} { - toCurlPort := int32(tcpNodePort) - if protocol == "udp" { - toCurlPort = int32(udpNodePort) - } + for _, serviceSpec := range []*v1.Service{etpLocalSvc, etpClusterSvc} { + tcpNodePort, udpNodePort := nodePortsFromService(serviceSpec) - for _, address := range toCheckNodesAddresses.List() { - if !isIPv6Cluster && utilnet.IsIPv6String(address) { - continue + for _, protocol := range []string{"http", "udp"} { + toCurlPort := int32(tcpNodePort) + if protocol == "udp" { + toCurlPort = int32(udpNodePort) + } + + for _, address := range toCheckNodesAddresses.List() { + if !isIPv6Cluster && utilnet.IsIPv6String(address) { + continue + } + ginkgo.By("Hitting service " + serviceSpec.Name + " on " + address + " via " + protocol) + gomega.Eventually(func() bool { + epHostname := pokeEndpoint("", clientContainerName, protocol, address, toCurlPort, "hostname") + // Expect to receive a valid hostname + return nodesHostnames.Has(epHostname) + }, "20s", "1s").Should(gomega.BeTrue()) } - ginkgo.By("Hitting the service on " + address + " via " + protocol) - gomega.Eventually(func() bool { - epHostname := pokeEndpoint("", clientContainerName, protocol, address, toCurlPort, "hostname") - // Expect to receive a valid hostname - return nodesHostnames.Has(epHostname) - }, "20s", "1s").Should(gomega.BeTrue()) } } + }) }) }) From ae8720cd8c01db9e277d8adfd449f973dacc23c5 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 5 May 2023 16:10:19 +0200 Subject: [PATCH 41/90] Dualstack Service unit test Make `Test_ETPCluster_NodePort_Service_WithMultipleIPAddresses` use v4+v6 IP addresses. Signed-off-by: Andrea Panattoni --- .../services/services_controller_test.go | 101 ++++++++++++------ 1 file changed, 71 insertions(+), 30 deletions(-) diff --git a/go-controller/pkg/ovn/controller/services/services_controller_test.go b/go-controller/pkg/ovn/controller/services/services_controller_test.go index b8c7d5a1e1..9c3175055c 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller_test.go +++ b/go-controller/pkg/ovn/controller/services/services_controller_test.go @@ -513,18 +513,22 @@ func TestSyncServices(t *testing.T) { func Test_ETPCluster_NodePort_Service_WithMultipleIPAddresses(t *testing.T) { g := gomega.NewGomegaWithT(t) globalconfig.IPv4Mode = true + globalconfig.IPv6Mode = true + _, cidr4, _ := net.ParseCIDR("10.128.0.0/16") + _, cidr6, _ := net.ParseCIDR("fe00:0:0:0:5555::0/64") + globalconfig.Default.ClusterSubnets = []globalconfig.CIDRNetworkEntry{{CIDR: cidr4, HostSubnetLength: 16}, {CIDR: cidr6, HostSubnetLength: 64}} + + nodeIPv4 := []net.IP{net.ParseIP("10.1.1.1"), net.ParseIP("10.2.2.2"), net.ParseIP("10.3.3.3")} + nodeIPv6 := []net.IP{net.ParseIP("fd00:0:0:0:1::1"), net.ParseIP("fd00:0:0:0:2::2")} nodeA := nodeInfo{ name: "node-a", - l3gatewayAddresses: []net.IP{net.ParseIP("10.1.1.1")}, - hostAddresses: []net.IP{ - net.ParseIP("10.1.1.1"), - net.ParseIP("10.2.2.2"), - net.ParseIP("10.3.3.3")}, - gatewayRouterName: nodeGWRouterName("node-a"), - switchName: nodeSwitchName("node-a"), - chassisID: "node-a", - zone: types.OvnDefaultZone, + l3gatewayAddresses: []net.IP{nodeIPv4[0], nodeIPv6[0]}, + hostAddresses: append(nodeIPv4, nodeIPv6...), + gatewayRouterName: nodeGWRouterName("node-a"), + switchName: nodeSwitchName("node-a"), + chassisID: "node-a", + zone: types.OvnDefaultZone, } svc := &v1.Service{ @@ -532,7 +536,8 @@ func Test_ETPCluster_NodePort_Service_WithMultipleIPAddresses(t *testing.T) { Spec: v1.ServiceSpec{ Type: v1.ServiceTypeNodePort, ClusterIP: "192.168.1.1", - ClusterIPs: []string{"192.168.1.1"}, + ClusterIPs: []string{"192.168.1.1", "fd00:0:0:0:7777::1"}, + IPFamilies: []v1.IPFamily{v1.IPv4Protocol, v1.IPv6Protocol}, Selector: map[string]string{"foo": "bar"}, ExternalTrafficPolicy: v1.ServiceExternalTrafficPolicyTypeCluster, Ports: []v1.ServicePort{{ @@ -544,24 +549,32 @@ func Test_ETPCluster_NodePort_Service_WithMultipleIPAddresses(t *testing.T) { }, } - endPointSlice := &discovery.EndpointSlice{ + endPointSliceV4 := &discovery.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ - Name: svc.Name + "ab23", + Name: svc.Name + "ipv4", Namespace: svc.Namespace, Labels: map[string]string{discovery.LabelServiceName: svc.Name}, }, - Ports: []discovery.EndpointPort{ - { - Protocol: &tcp, - Port: &outport, - }, - }, + Ports: []discovery.EndpointPort{{Protocol: &tcp, Port: &outport}}, AddressType: discovery.AddressTypeIPv4, Endpoints: []discovery.Endpoint{ readyEndpointsWithAddresses("10.128.0.2", "10.128.1.2"), }, } + endPointSliceV6 := &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: svc.Name + "ipv6", + Namespace: svc.Namespace, + Labels: map[string]string{discovery.LabelServiceName: svc.Name}, + }, + Ports: []discovery.EndpointPort{{Protocol: &tcp, Port: &outport}}, + AddressType: discovery.AddressTypeIPv6, + Endpoints: []discovery.Endpoint{ + readyEndpointsWithAddresses("fe00:0:0:0:5555::2", "fe00:0:0:0:5555::3"), + }, + } + controller, err := newControllerWithDBSetup(libovsdbtest.TestSetup{NBData: []libovsdbtest.TestData{ nodeLogicalSwitch(nodeA.name, initialLsGroups), nodeLogicalRouter(nodeA.name, initialLrGroups), @@ -573,7 +586,8 @@ func Test_ETPCluster_NodePort_Service_WithMultipleIPAddresses(t *testing.T) { g.Expect(err).ToNot(gomega.HaveOccurred()) defer controller.close() - controller.endpointSliceStore.Add(endPointSlice) + controller.endpointSliceStore.Add(endPointSliceV4) + controller.endpointSliceStore.Add(endPointSliceV6) controller.serviceStore.Add(svc) controller.nodeTracker.nodes = map[string]nodeInfo{nodeA.name: nodeA} @@ -583,12 +597,13 @@ func Test_ETPCluster_NodePort_Service_WithMultipleIPAddresses(t *testing.T) { expectedDb := []libovsdbtest.TestData{ &nbdb.LoadBalancer{ - UUID: nodeSwitchRouterLoadBalancerName(nodeA.name, svc.Namespace, svc.Name), - Name: nodeSwitchRouterLoadBalancerName(nodeA.name, svc.Namespace, svc.Name), + UUID: loadBalancerClusterWideTCPServiceName(svc.Namespace, svc.Name), + Name: loadBalancerClusterWideTCPServiceName(svc.Namespace, svc.Name), Options: servicesOptions(), Protocol: &nbdb.LoadBalancerProtocolTCP, Vips: map[string]string{ - "192.168.1.1:80": "10.128.0.2:3456,10.128.1.2:3456", + "192.168.1.1:80": "10.128.0.2:3456,10.128.1.2:3456", + "[fd00::7777:0:0:1]:80": "[fe00::5555:0:0:2]:3456,[fe00::5555:0:0:3]:3456", }, ExternalIDs: serviceExternalIDs(namespacedServiceName(svc.Namespace, svc.Name)), }, @@ -604,18 +619,36 @@ func Test_ETPCluster_NodePort_Service_WithMultipleIPAddresses(t *testing.T) { }, ExternalIDs: serviceExternalIDs(namespacedServiceName(svc.Namespace, svc.Name)), }, - nodeLogicalSwitch(nodeA.name, initialLsGroups, "Service_namespace1/svc-foo_TCP_node_router+switch_node-a"), - nodeLogicalRouter(nodeA.name, initialLrGroups, "Service_namespace1/svc-foo_TCP_node_router+switch_node-a"), - lbGroup(types.ClusterLBGroupName), - lbGroup(types.ClusterSwitchLBGroupName, nodeMergedTemplateLoadBalancerName(svc.Namespace, svc.Name, v1.IPv4Protocol)), - lbGroup(types.ClusterRouterLBGroupName, nodeMergedTemplateLoadBalancerName(svc.Namespace, svc.Name, v1.IPv4Protocol)), + &nbdb.LoadBalancer{ + UUID: "Service_namespace1/svc-foo_TCP_node_switch_template_IPv6_merged", + Name: "Service_namespace1/svc-foo_TCP_node_switch_template_IPv6_merged", + Options: templateServicesOptionsV6(), + Protocol: &nbdb.LoadBalancerProtocolTCP, + Vips: map[string]string{ + "^NODEIP_IPv6_1:30123": "[fe00::5555:0:0:2]:3456,[fe00::5555:0:0:3]:3456", + "^NODEIP_IPv6_0:30123": "[fe00::5555:0:0:2]:3456,[fe00::5555:0:0:3]:3456", + }, + ExternalIDs: serviceExternalIDs(namespacedServiceName(svc.Namespace, svc.Name)), + }, + nodeLogicalSwitch(nodeA.name, initialLsGroups), + nodeLogicalRouter(nodeA.name, initialLrGroups), + lbGroup(types.ClusterLBGroupName, loadBalancerClusterWideTCPServiceName(svc.Namespace, svc.Name)), + lbGroup(types.ClusterSwitchLBGroupName, + "Service_namespace1/svc-foo_TCP_node_switch_template_IPv4_merged", + "Service_namespace1/svc-foo_TCP_node_switch_template_IPv6_merged"), + lbGroup(types.ClusterRouterLBGroupName, + "Service_namespace1/svc-foo_TCP_node_switch_template_IPv4_merged", + "Service_namespace1/svc-foo_TCP_node_switch_template_IPv6_merged"), &nbdb.ChassisTemplateVar{ UUID: nodeA.chassisID, Chassis: nodeA.chassisID, Variables: map[string]string{ - makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "0": nodeA.hostAddresses[0].String(), - makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "1": nodeA.hostAddresses[1].String(), - makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "2": nodeA.hostAddresses[2].String(), + makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "0": nodeIPv4[0].String(), + makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "1": nodeIPv4[1].String(), + makeLBNodeIPTemplateNamePrefix(v1.IPv4Protocol) + "2": nodeIPv4[2].String(), + + makeLBNodeIPTemplateNamePrefix(v1.IPv6Protocol) + "0": nodeIPv6[0].String(), + makeLBNodeIPTemplateNamePrefix(v1.IPv6Protocol) + "1": nodeIPv6[1].String(), }, }, } @@ -725,6 +758,14 @@ func templateServicesOptions() map[string]string { return opts } +func templateServicesOptionsV6() map[string]string { + // Template LBs need "options:template=true" and "options:address-family" set. + opts := servicesOptions() + opts["template"] = "true" + opts["address-family"] = "ipv6" + return opts +} + func tcpGatewayRouterExternalIDs() map[string]string { return map[string]string{ "TCP_lb_gateway_router": "", From b3a3a398b1118bc505d066a2e03e553dcb89a1c9 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 5 May 2023 17:13:15 +0200 Subject: [PATCH 42/90] e2e: Move NodePort test to Services suite The test about NodePort services that are reachable on every host IP address should reside in the Service suite. Signed-off-by: Andrea Panattoni --- test/e2e/e2e.go | 69 ----------------------- test/e2e/service.go | 134 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 69 deletions(-) diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 0390ad9a47..b0aec93999 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -2269,75 +2269,6 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { } } }) - - // This test verifies a NodePort service is reachable on manually added IP addresses. - ginkgo.It("for NodePort services", func() { - isIPv6Cluster := IsIPv6Cluster(f.ClientSet) - - ginkgo.By("Creating NodePort services") - - etpLocalServiceName := "etplocal-svc" - etpLocalSvc := nodePortServiceSpecFrom(etpLocalServiceName, v1.IPFamilyPolicyPreferDualStack, endpointHTTPPort, endpointUDPPort, clusterHTTPPort, clusterUDPPort, endpointsSelector, v1.ServiceExternalTrafficPolicyTypeLocal) - etpLocalSvc, err := f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.Background(), etpLocalSvc, metav1.CreateOptions{}) - framework.ExpectNoError(err) - - etpClusterServiceName := "etpcluster-svc" - etpClusterSvc := nodePortServiceSpecFrom(etpClusterServiceName, v1.IPFamilyPolicyPreferDualStack, endpointHTTPPort, endpointUDPPort, clusterHTTPPort, clusterUDPPort, endpointsSelector, v1.ServiceExternalTrafficPolicyTypeCluster) - etpClusterSvc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.Background(), etpClusterSvc, metav1.CreateOptions{}) - framework.ExpectNoError(err) - - ginkgo.By("Waiting for the endpoints to pop up") - - err = framework.WaitForServiceEndpointsNum(f.ClientSet, f.Namespace.Name, etpLocalServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) - framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpLocalServiceName, f.Namespace.Name) - - err = framework.WaitForServiceEndpointsNum(f.ClientSet, f.Namespace.Name, etpClusterServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) - framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpClusterServiceName, f.Namespace.Name) - - toCheckNodesAddresses := sets.NewString() - for _, node := range nodes.Items { - - addrAnnotation, ok := node.Annotations["k8s.ovn.org/host-addresses"] - gomega.Expect(ok).To(gomega.BeTrue()) - - var addrs []string - err := json.Unmarshal([]byte(addrAnnotation), &addrs) - framework.ExpectNoError(err, "failed to parse node[%s] host-address annotation[%s]", node.Name, addrAnnotation) - - toCheckNodesAddresses.Insert(addrs...) - } - - // Ensure newly added IP address are in the host-addresses annotation - for _, newAddress := range newNodeAddresses { - if !toCheckNodesAddresses.Has(newAddress) { - toCheckNodesAddresses.Insert(newAddress) - } - } - - for _, serviceSpec := range []*v1.Service{etpLocalSvc, etpClusterSvc} { - tcpNodePort, udpNodePort := nodePortsFromService(serviceSpec) - - for _, protocol := range []string{"http", "udp"} { - toCurlPort := int32(tcpNodePort) - if protocol == "udp" { - toCurlPort = int32(udpNodePort) - } - - for _, address := range toCheckNodesAddresses.List() { - if !isIPv6Cluster && utilnet.IsIPv6String(address) { - continue - } - ginkgo.By("Hitting service " + serviceSpec.Name + " on " + address + " via " + protocol) - gomega.Eventually(func() bool { - epHostname := pokeEndpoint("", clientContainerName, protocol, address, toCurlPort, "hostname") - // Expect to receive a valid hostname - return nodesHostnames.Has(epHostname) - }, "20s", "1s").Should(gomega.BeTrue()) - } - } - } - - }) }) }) diff --git a/test/e2e/service.go b/test/e2e/service.go index 8f9e8822a4..7181c6ed7a 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -2,12 +2,14 @@ package e2e import ( "context" + "encoding/json" "fmt" "io/ioutil" "math/rand" "net" "os" "regexp" + "strconv" "strings" "time" @@ -16,6 +18,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" @@ -522,6 +525,137 @@ var _ = ginkgo.Describe("Services", func() { }) framework.ExpectNoError(err) }) + + ginkgo.It("of type NodePort should listen on each host addresses", func() { + const ( + endpointHTTPPort = 80 + endpointUDPPort = 90 + clusterHTTPPort = 81 + clusterUDPPort = 91 + clientContainerName = "npclient" + ) + + endPoints := make([]*v1.Pod, 0) + endpointsSelector := map[string]string{"servicebackend": "true"} + nodesHostnames := sets.NewString() + + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + ginkgo.By("Creating the endpoints pod, one for each worker") + for _, node := range nodes.Items { + args := []string{ + "netexec", + fmt.Sprintf("--http-port=%d", endpointHTTPPort), + fmt.Sprintf("--udp-port=%d", endpointUDPPort), + } + pod, err := createPod(f, node.Name+"-ep", node.Name, f.Namespace.Name, []string{}, + endpointsSelector, func(p *v1.Pod) { + p.Spec.Containers[0].Args = args + }) + framework.ExpectNoError(err) + + endPoints = append(endPoints, pod) + nodesHostnames.Insert(pod.Name) + } + + ginkgo.By("Creating an external container to send the traffic from") + createClusterExternalContainer(clientContainerName, agnhostImage, + []string{"--network", "kind", "-P"}, + []string{"netexec", "--http-port=80"}) + + // If `kindexgw` exists, connect client container to it + runCommand(containerRuntime, "network", "connect", "kindexgw", clientContainerName) + + ginkgo.By("Adding ip addresses to each node") + // add new secondary IP from node subnet to all nodes, if the cluster is v6 add an ipv6 address + toCurlAddresses := sets.NewString() + for i, node := range nodes.Items { + + addrAnnotation, ok := node.Annotations["k8s.ovn.org/host-addresses"] + gomega.Expect(ok).To(gomega.BeTrue()) + + var addrs []string + err := json.Unmarshal([]byte(addrAnnotation), &addrs) + framework.ExpectNoError(err, "failed to parse node[%s] host-address annotation[%s]", node.Name, addrAnnotation) + + toCurlAddresses.Insert(addrs...) + + var newIP string + if utilnet.IsIPv6String(e2enode.GetAddresses(&node, v1.NodeInternalIP)[0]) { + newIP = "fc00:f853:ccd:e794::" + strconv.Itoa(i) + } else { + newIP = "172.18.1." + strconv.Itoa(i+1) + } + // manually add the a secondary IP to each node + _, err = runCommand(containerRuntime, "exec", node.Name, "ip", "addr", "add", newIP, "dev", "breth0") + if err != nil { + framework.Failf("failed to add new Addresses to node %s: %v", node.Name, err) + } + + nodeName := node.Name + defer func() { + runCommand(containerRuntime, "exec", nodeName, "ip", "addr", "delete", newIP+"/32", "dev", "breth0") + framework.ExpectNoError(err, "failed to remove ip address %s from node %s", newIP, nodeName) + }() + + toCurlAddresses.Insert(newIP) + } + + defer deleteClusterExternalContainer(clientContainerName) + + isIPv6Cluster := IsIPv6Cluster(f.ClientSet) + + ginkgo.By("Creating NodePort services") + + etpLocalServiceName := "etp-local-svc" + etpLocalSvc := nodePortServiceSpecFrom(etpLocalServiceName, v1.IPFamilyPolicyPreferDualStack, endpointHTTPPort, endpointUDPPort, clusterHTTPPort, clusterUDPPort, endpointsSelector, v1.ServiceExternalTrafficPolicyTypeLocal) + etpLocalSvc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.Background(), etpLocalSvc, metav1.CreateOptions{}) + framework.ExpectNoError(err) + + etpClusterServiceName := "etp-cluster-svc" + etpClusterSvc := nodePortServiceSpecFrom(etpClusterServiceName, v1.IPFamilyPolicyPreferDualStack, endpointHTTPPort, endpointUDPPort, clusterHTTPPort, clusterUDPPort, endpointsSelector, v1.ServiceExternalTrafficPolicyTypeCluster) + etpClusterSvc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.Background(), etpClusterSvc, metav1.CreateOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Waiting for the endpoints to pop up") + + err = framework.WaitForServiceEndpointsNum(f.ClientSet, f.Namespace.Name, etpLocalServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpLocalServiceName, f.Namespace.Name) + + err = framework.WaitForServiceEndpointsNum(f.ClientSet, f.Namespace.Name, etpClusterServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpClusterServiceName, f.Namespace.Name) + + for _, serviceSpec := range []*v1.Service{etpLocalSvc, etpClusterSvc} { + tcpNodePort, udpNodePort := nodePortsFromService(serviceSpec) + + for _, protocol := range []string{"http", "udp"} { + toCurlPort := int32(tcpNodePort) + if protocol == "udp" { + toCurlPort = int32(udpNodePort) + } + + for _, address := range toCurlAddresses.List() { + if !isIPv6Cluster && utilnet.IsIPv6String(address) { + continue + } + + ginkgo.By("Hitting service " + serviceSpec.Name + " on " + address + " via " + protocol) + gomega.Eventually(func() bool { + epHostname := pokeEndpoint("", clientContainerName, protocol, address, toCurlPort, "hostname") + // Expect to receive a valid hostname + return nodesHostnames.Has(epHostname) + }, "20s", "1s").Should(gomega.BeTrue()) + } + } + } + }) }) // This test ensures that - when a pod that's a backend for a service curls the From 1325e27cd81e547b8889db35c2d987b35490c197 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 May 2023 20:49:57 +0000 Subject: [PATCH 43/90] Bump github.com/docker/distribution in /test/e2e Bumps [github.com/docker/distribution](https://github.com/docker/distribution) from 2.8.1+incompatible to 2.8.2+incompatible. - [Release notes](https://github.com/docker/distribution/releases) - [Commits](https://github.com/docker/distribution/compare/v2.8.1...v2.8.2) --- updated-dependencies: - dependency-name: github.com/docker/distribution dependency-type: indirect ... Signed-off-by: dependabot[bot] --- test/e2e/go.mod | 2 +- test/e2e/go.sum | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/e2e/go.mod b/test/e2e/go.mod index 477e4154b5..1067bcebff 100644 --- a/test/e2e/go.mod +++ b/test/e2e/go.mod @@ -54,7 +54,7 @@ require ( github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/docker/distribution v2.8.1+incompatible // indirect + github.com/docker/distribution v2.8.2+incompatible // indirect github.com/emicklei/go-restful/v3 v3.9.0 // indirect github.com/evanphx/json-patch v4.12.0+incompatible // indirect github.com/felixge/httpsnoop v1.0.1 // indirect diff --git a/test/e2e/go.sum b/test/e2e/go.sum index aa5e9b1a0a..57723b5779 100644 --- a/test/e2e/go.sum +++ b/test/e2e/go.sum @@ -161,8 +161,9 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= github.com/docker/distribution v2.8.0+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/distribution v2.8.1+incompatible h1:Q50tZOPR6T/hjNsyc9g8/syEs6bk8XXApsHjKukMl68= github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= +github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8= +github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker v20.10.12+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= From bb4e569e8b29aa3de0a2aec28e83337a06b6a39a Mon Sep 17 00:00:00 2001 From: Martin Kennelly Date: Fri, 12 May 2023 12:20:54 +0100 Subject: [PATCH 44/90] Emit node events only when retry failure Nodes obj is configured via distributed software components and previous to this patch, we are sending numerous kubernetes events of error level warning when infact everything is proceeding normally.. Only emit warning events when we fail to configure a node. This is after 15 retry attempts - ~7m currently. We continue logging every node add/update/delete failure to logs. Signed-off-by: Martin Kennelly --- go-controller/pkg/ovn/default_network_controller.go | 4 ++++ go-controller/pkg/ovn/master.go | 13 ++----------- go-controller/pkg/ovn/master_test.go | 12 ------------ go-controller/pkg/ovn/ovn.go | 10 ++++++++++ go-controller/pkg/retry/obj_retry.go | 4 ++++ 5 files changed, 20 insertions(+), 23 deletions(-) diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 52bca289b9..4533c1368b 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -668,6 +668,10 @@ func (h *defaultNetworkControllerEventHandler) RecordErrorEvent(obj interface{}, pod := obj.(*kapi.Pod) klog.V(5).Infof("Recording error event on pod %s/%s", pod.Namespace, pod.Name) h.oc.recordPodEvent(reason, err, pod) + case factory.NodeType: + node := obj.(*kapi.Node) + klog.V(5).Infof("Recording error event for node %s", node.Name) + h.oc.recordNodeEvent(reason, err, node) } } diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 72180a47a1..7744b654bc 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -758,9 +758,7 @@ func (oc *DefaultNetworkController) addUpdateLocalNodeEvent(node *kapi.Node, nSy if nSyncs.syncZoneIC { oc.syncZoneICFailed.Store(node.Name, true) } - err = fmt.Errorf("nodeAdd: error adding node %q: %w", node.Name, err) - oc.recordNodeErrorEvent(node, err) - return err + return fmt.Errorf("nodeAdd: error adding node %q: %w", node.Name, err) } oc.addNodeFailed.Delete(node.Name) } @@ -856,13 +854,7 @@ func (oc *DefaultNetworkController) addUpdateLocalNodeEvent(node *kapi.Node, nSy } } } - - err = kerrors.NewAggregate(errs) - if err != nil { - oc.recordNodeErrorEvent(node, err) - } - - return err + return kerrors.NewAggregate(errs) } func (oc *DefaultNetworkController) addUpdateRemoteNodeEvent(node *kapi.Node, syncZoneIC bool) error { @@ -897,7 +889,6 @@ func (oc *DefaultNetworkController) addUpdateRemoteNodeEvent(node *kapi.Node, sy } } } - return err } diff --git a/go-controller/pkg/ovn/master_test.go b/go-controller/pkg/ovn/master_test.go index 32fae892db..6a18f84fcd 100644 --- a/go-controller/pkg/ovn/master_test.go +++ b/go-controller/pkg/ovn/master_test.go @@ -1342,18 +1342,6 @@ var _ = ginkgo.Describe("Default network controller operations", func() { gomega.BeNil(), // oldObj should be nil gomega.Not(gomega.BeNil()), // newObj should not be nil ) - - // check that a node event was posted - gomega.Eventually(func() []string { - eventsLock.Lock() - defer eventsLock.Unlock() - eventsCopy := make([]string, 0, len(events)) - for _, e := range events { - eventsCopy = append(eventsCopy, e) - } - return eventsCopy - }, 10).Should(gomega.ContainElement(gomega.ContainSubstring("Warning ErrorReconcilingNode error creating gateway for node node1"))) - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) defer cancel() ginkgo.By("bring up NBDB") diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index 7acde5b88a..bbd1f47d87 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -109,6 +109,16 @@ func (oc *DefaultNetworkController) recordPodEvent(reason string, addErr error, } } +func (oc *DefaultNetworkController) recordNodeEvent(reason string, addErr error, node *kapi.Node) { + nodeRef, err := ref.GetReference(scheme.Scheme, node) + if err != nil { + klog.Errorf("Couldn't get a reference to node %s to post an event: '%v'", node.Name, err) + } else { + klog.V(5).Infof("Posting a %s event for node %s", kapi.EventTypeWarning, node.Name) + oc.recorder.Eventf(nodeRef, kapi.EventTypeWarning, reason, addErr.Error()) + } +} + func exGatewayAnnotationsChanged(oldPod, newPod *kapi.Pod) bool { return oldPod.Annotations[util.RoutingNamespaceAnnotation] != newPod.Annotations[util.RoutingNamespaceAnnotation] || oldPod.Annotations[util.RoutingNetworkAnnotation] != newPod.Annotations[util.RoutingNetworkAnnotation] || diff --git a/go-controller/pkg/retry/obj_retry.go b/go-controller/pkg/retry/obj_retry.go index 2120297829..6c7120335c 100644 --- a/go-controller/pkg/retry/obj_retry.go +++ b/go-controller/pkg/retry/obj_retry.go @@ -233,6 +233,10 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) { r.ResourceHandler.ObjType, objKey) r.DeleteRetryObj(key) metrics.MetricResourceRetryFailuresCount.Inc() + if entry.newObj != nil { + r.ResourceHandler.RecordErrorEvent(entry.newObj, "RetryFailed", + fmt.Errorf("failed to reconcile and retried %d times for object: %v", MaxFailedAttempts, entry.newObj)) + } return } forceRetry := false From f64a156dbd108e9c3f0be0efdd55752b194c8a8e Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Fri, 21 Apr 2023 09:52:19 +0200 Subject: [PATCH 45/90] Fix handling alternative netdev names netlink.LinkByName() also returns links if an altname is provided. This means that netlink.LinkByName(x) returns a correct link even if the name is different from x. Signed-off-by: Balazs Nemeth --- go-controller/pkg/node/management-port-dpu.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/node/management-port-dpu.go b/go-controller/pkg/node/management-port-dpu.go index f623953eaa..4bb871a565 100644 --- a/go-controller/pkg/node/management-port-dpu.go +++ b/go-controller/pkg/node/management-port-dpu.go @@ -55,7 +55,7 @@ func (mp *managementPortRepresentor) Create(_ *routeManager, nodeAnnotator kube. if err != nil { return nil, fmt.Errorf("failed to get link device for %s. %v", mp.repName, err) } - } else if mp.repName != k8sMgmtIntfName { + } else if link.Attrs().Name != k8sMgmtIntfName { if err := syncMgmtPortInterface(mp.hostSubnets, k8sMgmtIntfName, false); err != nil { return nil, fmt.Errorf("failed to check existing management port: %v", err) } @@ -193,7 +193,7 @@ func (mp *managementPortNetdev) Create(routeManager *routeManager, nodeAnnotator if err != nil { return nil, fmt.Errorf("failed to get link device for %s. %v", mp.netdevName, err) } - } else if mp.netdevName != types.K8sMgmtIntfName { + } else if link.Attrs().Name != types.K8sMgmtIntfName { err = syncMgmtPortInterface(mp.hostSubnets, types.K8sMgmtIntfName, false) if err != nil { return nil, fmt.Errorf("failed to sync management port: %v", err) From e55d3fe67f9f18c46a0fb6ac6de0d20893dffd69 Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Fri, 14 Apr 2023 10:23:24 +0200 Subject: [PATCH 46/90] Expose PfID and VfID in DPU host mode and read that in DPU mode Currently, the vf rep needs to be specified manually on the DPU side, but given a configuration on the host side, there is only one correct configuration on the DPU side. Instead of relying on the user to provide the right config on both sides, expose the config on the host side through an annotation and read that annotation from the DPU side. Signed-off-by: Balazs Nemeth --- go-controller/pkg/config/config.go | 13 +- go-controller/pkg/config/config_test.go | 17 +-- .../node/default_node_network_controller.go | 129 +++++++++++++++--- .../pkg/node/gateway_init_linux_test.go | 1 - go-controller/pkg/node/management-port-dpu.go | 48 ++----- go-controller/pkg/node/management-port.go | 10 +- .../pkg/node/management-port_dpu_test.go | 13 -- .../pkg/node/management-port_linux_test.go | 15 +- .../pkg/node/management-port_test.go | 18 +-- go-controller/pkg/util/node_annotations.go | 36 +++++ 10 files changed, 202 insertions(+), 98 deletions(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 654df1f6ca..95ca52efa0 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -2454,11 +2454,14 @@ func buildOvnKubeNodeConfig(ctx *cli.Context, cli, file *config) error { OvnKubeNode.MgmtPortNetdev, OvnKubeNode.MgmtPortDPResourceName) } - // when DPU is used, management port is backed by a VF. get management port VF information - if OvnKubeNode.Mode == types.NodeModeDPU || OvnKubeNode.Mode == types.NodeModeDPUHost { - if OvnKubeNode.MgmtPortNetdev == "" && OvnKubeNode.MgmtPortDPResourceName == "" { - return fmt.Errorf("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must be provided") - } + // when DPU is used, management port is always backed by a representor. On the + // host side, it needs to be provided through --ovnkube-node-mgmt-port-netdev. + // On the DPU, it is derrived from the annotation exposed on the host side. + if OvnKubeNode.Mode == types.NodeModeDPU && !(OvnKubeNode.MgmtPortNetdev == "" && OvnKubeNode.MgmtPortDPResourceName == "") { + return fmt.Errorf("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must not be provided") + } + if OvnKubeNode.Mode == types.NodeModeDPUHost && OvnKubeNode.MgmtPortNetdev == "" && OvnKubeNode.MgmtPortDPResourceName == "" { + return fmt.Errorf("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must be provided") } return nil } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index e399cef12c..46db2c74ec 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -1691,29 +1691,25 @@ foo=bar } file := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPU, - MgmtPortNetdev: "enp1s0f0v0", - MgmtPortDPResourceName: "openshift.io/mgmtvf", + Mode: types.NodeModeDPU, }, } err := buildOvnKubeNodeConfig(nil, &cliConfig, &file) gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Expect(OvnKubeNode.Mode).To(gomega.Equal(types.NodeModeDPU)) - gomega.Expect(OvnKubeNode.MgmtPortNetdev).To(gomega.Equal("enp1s0f0v0")) - gomega.Expect(OvnKubeNode.MgmtPortDPResourceName).To(gomega.Equal("openshift.io/mgmtvf")) }) It("Overrides value from CLI", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPU, + Mode: types.NodeModeDPUHost, MgmtPortNetdev: "enp1s0f0v0", MgmtPortDPResourceName: "openshift.io/mgmtvf", }, } err := buildOvnKubeNodeConfig(nil, &cliConfig, &config{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Expect(OvnKubeNode.Mode).To(gomega.Equal(types.NodeModeDPU)) + gomega.Expect(OvnKubeNode.Mode).To(gomega.Equal(types.NodeModeDPUHost)) gomega.Expect(OvnKubeNode.MgmtPortNetdev).To(gomega.Equal("enp1s0f0v0")) gomega.Expect(OvnKubeNode.MgmtPortDPResourceName).To(gomega.Equal("openshift.io/mgmtvf")) }) @@ -1742,15 +1738,16 @@ foo=bar "hybrid overlay is not supported with ovnkube-node mode")) }) - It("Fails if management port is not provided and ovnkube node mode is dpu", func() { + It("Fails if management port is provided and ovnkube node mode is dpu", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPU, + Mode: types.NodeModeDPU, + MgmtPortNetdev: "enp1s0f0v0", }, } err := buildOvnKubeNodeConfig(nil, &cliConfig, &config{}) gomega.Expect(err).To(gomega.HaveOccurred()) - gomega.Expect(err.Error()).To(gomega.ContainSubstring("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must be provided")) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must not be provided")) }) It("Fails if management port is not provided and ovnkube node mode is dpu-host", func() { diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 1e87d34dcc..f9dd1b0fd9 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -456,28 +456,123 @@ func handleNetdevResources(resourceName string) (string, error) { return netdevice, nil } -func createNodeManagementPorts(name string, nodeAnnotator kube.Annotator, waiter *startupWaiter, - subnets []*net.IPNet, routeManager *routeManager) ([]managementPortEntry, *managementPortConfig, error) { - // If netdevice name is not provided in the full mode then management port backed by OVS internal port. - // If it is provided then it is backed by VF or SF and need to determine its representor name to plug - // into OVS integrational bridge - if config.OvnKubeNode.Mode == types.NodeModeFull && config.OvnKubeNode.MgmtPortNetdev != "" { - deviceID, err := util.GetDeviceIDFromNetdevice(config.OvnKubeNode.MgmtPortNetdev) +func exportManagementPortAnnotation(netdevName string, nodeAnnotator kube.Annotator) error { + klog.Infof("Exporting management port annotation for netdev '%v'", netdevName) + deviceID, err := util.GetDeviceIDFromNetdevice(netdevName) + if err != nil { + return err + } + vfindex, err := util.GetSriovnetOps().GetVfIndexByPciAddress(deviceID) + if err != nil { + return err + } + pfindex, err := util.GetSriovnetOps().GetPfIndexByVfPciAddress(deviceID) + if err != nil { + return err + } + + return util.SetNodeManagementPortAnnotation(nodeAnnotator, pfindex, vfindex) +} + +func importManagementPortAnnotation(node *kapi.Node) (string, error) { + klog.Infof("Import management port annotation on node '%v'", node.Name) + pfId, vfId, err := util.ParseNodeManagementPortAnnotation(node) + + if err != nil { + return "", err + } + klog.Infof("Imported pfId '%v' and FuncId '%v' for node '%v'", pfId, vfId, node.Name) + + return util.GetSriovnetOps().GetVfRepresentorDPU(fmt.Sprintf("%d", pfId), fmt.Sprintf("%d", vfId)) +} + +// Take care of alternative names for the netdevName by making sure we +// use the link attribute name as well as handle the case when netdevName +// was renamed to types.K8sMgmtIntfName +func getManagementPortNetDev(netdevName string) (string, error) { + link, err := util.GetNetLinkOps().LinkByName(netdevName) + if err != nil { + if !util.GetNetLinkOps().IsLinkNotFoundError(err) { + return "", fmt.Errorf("failed to lookup %s link: %v", netdevName, err) + } + // this may not the first time invoked on the node after reboot + // netdev may have already been renamed to ovn-k8s-mp0. + link, err = util.GetNetLinkOps().LinkByName(types.K8sMgmtIntfName) if err != nil { - // Device might had been already renamed to types.K8sMgmtIntfName - config.OvnKubeNode.MgmtPortNetdev = types.K8sMgmtIntfName - if deviceID, err = util.GetDeviceIDFromNetdevice(config.OvnKubeNode.MgmtPortNetdev); err != nil { - return nil, nil, fmt.Errorf("failed to get device id for %s or %s: %v", - config.OvnKubeNode.MgmtPortNetdev, types.K8sMgmtIntfName, err) - } + return "", fmt.Errorf("failed to get link device for %s. %v", netdevName, err) } - rep, err := util.GetFunctionRepresentorName(deviceID) + } + + if link.Attrs().Name != netdevName { + klog.Infof("'%v' != '%v' (link.Attrs().Name != netdevName)", link.Attrs().Name, netdevName) + } + return link.Attrs().Name, err +} + +func getMgmtPortAndRepNameModeFull() (string, string, error) { + if config.OvnKubeNode.MgmtPortNetdev == "" { + return "", "", nil + } + netdevName, err := getManagementPortNetDev(config.OvnKubeNode.MgmtPortNetdev) + if err != nil { + return "", "", err + } + deviceID, err := util.GetDeviceIDFromNetdevice(netdevName) + if err != nil { + return "", "", fmt.Errorf("failed to get device id for %s: %v", netdevName, err) + } + rep, err := util.GetFunctionRepresentorName(deviceID) + if err != nil { + return "", "", err + } + return netdevName, rep, err +} + +// In DPU mode, read the annotation from the host side which should have been +// exported by ovn-k running in DPU host mode. +func getMgmtPortAndRepNameModeDPU(node *kapi.Node) (string, string, error) { + rep, err := importManagementPortAnnotation(node) + if err != nil { + return "", "", err + } + return "", rep, err +} + +func getMgmtPortAndRepNameModeDPUHost() (string, string, error) { + netdevName, err := getManagementPortNetDev(config.OvnKubeNode.MgmtPortNetdev) + if err != nil { + return "", "", err + } + return netdevName, "", nil +} + +func getMgmtPortAndRepName(node *kapi.Node) (string, string, error) { + switch config.OvnKubeNode.Mode { + case types.NodeModeFull: + return getMgmtPortAndRepNameModeFull() + case types.NodeModeDPU: + return getMgmtPortAndRepNameModeDPU(node) + case types.NodeModeDPUHost: + return getMgmtPortAndRepNameModeDPUHost() + default: + return "", "", fmt.Errorf("unexpected config.OvnKubeNode.Mode '%v'", config.OvnKubeNode.Mode) + } +} + +func createNodeManagementPorts(node *kapi.Node, nodeAnnotator kube.Annotator, waiter *startupWaiter, + subnets []*net.IPNet, routeManager *routeManager) ([]managementPortEntry, *managementPortConfig, error) { + netdevName, rep, err := getMgmtPortAndRepName(node) + if err != nil { + return nil, nil, err + } + + if config.OvnKubeNode.Mode == types.NodeModeDPUHost { + err := exportManagementPortAnnotation(netdevName, nodeAnnotator) if err != nil { return nil, nil, err } - config.OvnKubeNode.MgmtPortRepresentor = rep } - ports := NewManagementPorts(name, subnets) + ports := NewManagementPorts(node.Name, subnets, netdevName, rep) var mgmtPortConfig *managementPortConfig mgmtPorts := make([]managementPortEntry, 0) @@ -630,7 +725,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } // Setup management ports - mgmtPorts, mgmtPortConfig, err := createNodeManagementPorts(nc.name, nodeAnnotator, waiter, subnets, nc.routeManager) + mgmtPorts, mgmtPortConfig, err := createNodeManagementPorts(node, nodeAnnotator, waiter, subnets, nc.routeManager) if err != nil { return err } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index e62ea12e24..0c35898eeb 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -684,7 +684,6 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, "--nodeport", "--mtu=" + mtu, "--ovnkube-node-mode=" + types.NodeModeDPU, - "--ovnkube-node-mgmt-port-netdev=pf0vf0", }) Expect(err).NotTo(HaveOccurred()) } diff --git a/go-controller/pkg/node/management-port-dpu.go b/go-controller/pkg/node/management-port-dpu.go index 4bb871a565..2ee941f944 100644 --- a/go-controller/pkg/node/management-port-dpu.go +++ b/go-controller/pkg/node/management-port-dpu.go @@ -21,19 +21,11 @@ type managementPortRepresentor struct { } // newManagementPortRepresentor creates a new managementPortRepresentor -func newManagementPortRepresentor(nodeName string, hostSubnets []*net.IPNet) ManagementPort { - var repName string - - // In ovnkube-node mode DPU representor name stored in MgmtPortNetdev variable - if config.OvnKubeNode.MgmtPortRepresentor == "" { - repName = config.OvnKubeNode.MgmtPortNetdev - } else { - repName = config.OvnKubeNode.MgmtPortRepresentor - } +func newManagementPortRepresentor(nodeName string, hostSubnets []*net.IPNet, rep string) ManagementPort { return &managementPortRepresentor{ nodeName: nodeName, hostSubnets: hostSubnets, - repName: repName, + repName: rep, } } @@ -43,19 +35,14 @@ func (mp *managementPortRepresentor) Create(_ *routeManager, nodeAnnotator kube. k8sMgmtIntfName += "_0" } - klog.Infof("Lookup representor link and existing management port") + klog.Infof("Lookup representor link and existing management port for '%v'", mp.repName) // Get management port representor netdevice link, err := util.GetNetLinkOps().LinkByName(mp.repName) if err != nil { - if !util.GetNetLinkOps().IsLinkNotFoundError(err) { - return nil, fmt.Errorf("failed to lookup %s link: %v", mp.repName, err) - } - // It may fail in case this is not the first run after reboot and management port has already been renamed. - link, err = util.GetNetLinkOps().LinkByName(k8sMgmtIntfName) - if err != nil { - return nil, fmt.Errorf("failed to get link device for %s. %v", mp.repName, err) - } - } else if link.Attrs().Name != k8sMgmtIntfName { + return nil, err + } + + if link.Attrs().Name != k8sMgmtIntfName { if err := syncMgmtPortInterface(mp.hostSubnets, k8sMgmtIntfName, false); err != nil { return nil, fmt.Errorf("failed to check existing management port: %v", err) } @@ -172,28 +159,21 @@ type managementPortNetdev struct { } // newManagementPortNetdev creates a new managementPortNetdev -func newManagementPortNetdev(hostSubnets []*net.IPNet) ManagementPort { +func newManagementPortNetdev(hostSubnets []*net.IPNet, netdevName string) ManagementPort { return &managementPortNetdev{ hostSubnets: hostSubnets, - netdevName: config.OvnKubeNode.MgmtPortNetdev, + netdevName: netdevName, } } func (mp *managementPortNetdev) Create(routeManager *routeManager, nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) { - klog.Infof("Lookup netdevice link and existing management port") - // get netdev that is used for management port. + klog.Infof("Lookup netdevice link and existing management port using '%v'", mp.netdevName) link, err := util.GetNetLinkOps().LinkByName(mp.netdevName) if err != nil { - if !util.GetNetLinkOps().IsLinkNotFoundError(err) { - return nil, fmt.Errorf("failed to lookup %s link: %v", mp.netdevName, err) - } - // this may not the first time invoked on the node after reboot - // netdev may have already been renamed to ovn-k8s-mp0. - link, err = util.GetNetLinkOps().LinkByName(types.K8sMgmtIntfName) - if err != nil { - return nil, fmt.Errorf("failed to get link device for %s. %v", mp.netdevName, err) - } - } else if link.Attrs().Name != types.K8sMgmtIntfName { + return nil, err + } + + if link.Attrs().Name != types.K8sMgmtIntfName { err = syncMgmtPortInterface(mp.hostSubnets, types.K8sMgmtIntfName, false) if err != nil { return nil, fmt.Errorf("failed to sync management port: %v", err) diff --git a/go-controller/pkg/node/management-port.go b/go-controller/pkg/node/management-port.go index a5c98aba47..2d17cf57a5 100644 --- a/go-controller/pkg/node/management-port.go +++ b/go-controller/pkg/node/management-port.go @@ -33,7 +33,7 @@ type ManagementPort interface { } // NewManagementPorts creates a new ManagementPorts -func NewManagementPorts(nodeName string, hostSubnets []*net.IPNet) []ManagementPort { +func NewManagementPorts(nodeName string, hostSubnets []*net.IPNet, netdevName, rep string) []ManagementPort { // Kubernetes emits events when pods are created. The event will contain // only lowercase letters of the hostname even though the kubelet is // started with a hostname that contains lowercase and uppercase letters. @@ -46,17 +46,17 @@ func NewManagementPorts(nodeName string, hostSubnets []*net.IPNet) []ManagementP switch config.OvnKubeNode.Mode { case types.NodeModeDPU: - return []ManagementPort{newManagementPortRepresentor(nodeName, hostSubnets)} + return []ManagementPort{newManagementPortRepresentor(nodeName, hostSubnets, rep)} case types.NodeModeDPUHost: - return []ManagementPort{newManagementPortNetdev(hostSubnets)} + return []ManagementPort{newManagementPortNetdev(hostSubnets, netdevName)} default: // create OVS internal port or configure netdevice and its representor if config.OvnKubeNode.MgmtPortNetdev == "" { return []ManagementPort{newManagementPort(nodeName, hostSubnets)} } else { return []ManagementPort{ - newManagementPortNetdev(hostSubnets), - newManagementPortRepresentor(nodeName, hostSubnets), + newManagementPortNetdev(hostSubnets, netdevName), + newManagementPortRepresentor(nodeName, hostSubnets, rep), } } } diff --git a/go-controller/pkg/node/management-port_dpu_test.go b/go-controller/pkg/node/management-port_dpu_test.go index f933327fd9..cebfb1ef72 100644 --- a/go-controller/pkg/node/management-port_dpu_test.go +++ b/go-controller/pkg/node/management-port_dpu_test.go @@ -149,10 +149,7 @@ var _ = Describe("Mananagement port DPU tests", func() { linkMock.On("Attrs").Return(&netlink.LinkAttrs{Name: "ovn-k8s-mp0", MTU: config.Default.MTU}) netlinkOpsMock.On("LinkByName", "enp3s0f0v0").Return( - nil, fmt.Errorf("failed to get link device")) - netlinkOpsMock.On("LinkByName", types.K8sMgmtIntfName).Return( linkMock, nil) - netlinkOpsMock.On("IsLinkNotFoundError", mock.Anything).Return(true) netlinkOpsMock.On("LinkSetUp", linkMock).Return(nil) execMock.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: genOVSAddMgmtPortCmd(mgmtPortDpu.nodeName, mgmtPortDpu.repName), @@ -246,16 +243,6 @@ var _ = Describe("Mananagement port DPU tests", func() { linkMock.On("Attrs").Return(&netlink.LinkAttrs{ Name: "ovn-k8s-mp0", MTU: 1400, HardwareAddr: expectedMgmtPortMac}) - netlinkOpsMock.On("LinkByName", "enp3s0f0v0").Return( - nil, fmt.Errorf("failed to get link")).Once() - netlinkOpsMock.On("LinkByName", types.K8sMgmtIntfName).Return( - linkMock, nil).Once() - netlinkOpsMock.On("LinkSetUp", linkMock).Return(nil, nil).Once() - netlinkOpsMock.On("IsLinkNotFoundError", mock.Anything).Return(true) - execMock.AddFakeCmdsNoOutputNoError([]string{ - "ovs-vsctl --timeout=15 set Open_vSwitch . external-ids:ovn-orig-mgmt-port-netdev-name=" + mgmtPortDpuHost.netdevName, - }) - // mock createPlatformManagementPort, we fail it as it covers what we want to test without the // need to mock the entire flow down to routes and iptable rules. netlinkOpsMock.On("LinkByName", mock.Anything).Return(nil, fmt.Errorf( diff --git a/go-controller/pkg/node/management-port_linux_test.go b/go-controller/pkg/node/management-port_linux_test.go index cdd2d64f4c..fbd729feff 100644 --- a/go-controller/pkg/node/management-port_linux_test.go +++ b/go-controller/pkg/node/management-port_linux_test.go @@ -266,7 +266,6 @@ func testManagementPort(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns.Net nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() - mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs) wg := &sync.WaitGroup{} rm := newRouteManager(wg, true, 10*time.Second) stopCh := make(chan struct{}) @@ -283,6 +282,11 @@ func testManagementPort(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns.Net }) err = testNS.Do(func(ns.NetNS) error { + defer GinkgoRecover() + + netdevName, rep := "", "" + + mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs, netdevName, rep) _, err = mgmtPorts[0].Create(rm, nodeAnnotator, waiter) Expect(err).NotTo(HaveOccurred()) checkMgmtTestPortIpsAndRoutes(configs, mgtPort, mgtPortAddrs, expectedLRPMAC) @@ -367,7 +371,10 @@ func testManagementPortDPU(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns. err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() - mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs) + + netdevName, rep := "pf0vf0", "pf0vf0" + + mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs, netdevName, rep) _, err = mgmtPorts[0].Create(rm, nodeAnnotator, waiter) Expect(err).NotTo(HaveOccurred()) // make sure interface was renamed and mtu was set @@ -454,8 +461,9 @@ func testManagementPortDPUHost(ctx *cli.Context, fexec *ovntest.FakeExec, testNS err = testNS.Do(func(ns.NetNS) error { defer GinkgoRecover() - mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs) + netdevName, rep := "pf0vf0", "" + mgmtPorts := NewManagementPorts(nodeName, nodeSubnetCIDRs, netdevName, rep) _, err = mgmtPorts[0].Create(rm, nil, nil) Expect(err).NotTo(HaveOccurred()) checkMgmtTestPortIpsAndRoutes(configs, mgtPort, mgtPortAddrs, expectedLRPMAC) @@ -916,7 +924,6 @@ var _ = Describe("Management Port Operations", func() { "--cluster-subnets=" + v4clusterCIDR, "--k8s-service-cidr=" + v4serviceCIDR, "--ovnkube-node-mode=" + types.NodeModeDPU, - "--ovnkube-node-mgmt-port-netdev=" + mgmtPortNetdev, }) Expect(err).NotTo(HaveOccurred()) }) diff --git a/go-controller/pkg/node/management-port_test.go b/go-controller/pkg/node/management-port_test.go index 54e27a0855..5e2ff9896a 100644 --- a/go-controller/pkg/node/management-port_test.go +++ b/go-controller/pkg/node/management-port_test.go @@ -16,36 +16,36 @@ var _ = Describe("Mananagement port tests", func() { Context("NewManagementPort Creates Management port object according to config.OvnKubeNode.Mode", func() { It("Creates managementPort by default", func() { - mgmtPorts := NewManagementPorts("worker-node", nil) + mgmtPorts := NewManagementPorts("worker-node", nil, "", "") Expect(len(mgmtPorts)).To(Equal(1)) Expect(reflect.TypeOf(mgmtPorts[0]).String()).To(Equal(reflect.TypeOf(&managementPort{}).String())) }) It("Creates managementPortRepresentor for Ovnkube Node mode dpu", func() { config.OvnKubeNode.Mode = types.NodeModeDPU - config.OvnKubeNode.MgmtPortNetdev = "ens1f0v0" - mgmtPorts := NewManagementPorts("worker-node", nil) + netdevName, rep := "", "ens1f0v0" + mgmtPorts := NewManagementPorts("worker-node", nil, netdevName, rep) Expect(len(mgmtPorts)).To(Equal(1)) Expect(reflect.TypeOf(mgmtPorts[0]).String()).To(Equal(reflect.TypeOf(&managementPortRepresentor{}).String())) port, _ := mgmtPorts[0].(*managementPortRepresentor) - Expect(port.repName).To(Equal("ens1f0v0")) + Expect(port.repName).To(Equal(rep)) }) It("Creates managementPortNetdev for Ovnkube Node mode dpu-host", func() { config.OvnKubeNode.Mode = types.NodeModeDPUHost - mgmtPorts := NewManagementPorts("worker-node", nil) + mgmtPorts := NewManagementPorts("worker-node", nil, "", "") Expect(len(mgmtPorts)).To(Equal(1)) Expect(reflect.TypeOf(mgmtPorts[0]).String()).To(Equal(reflect.TypeOf(&managementPortNetdev{}).String())) }) It("Creates managementPortNetdev and managementPortRepresentor for Ovnkube Node mode full", func() { config.OvnKubeNode.MgmtPortNetdev = "ens1f0v0" - mgmtPorts := NewManagementPorts("worker-node", nil) + mgmtPorts := NewManagementPorts("worker-node", nil, "", "") Expect(len(mgmtPorts)).To(Equal(2)) Expect(reflect.TypeOf(mgmtPorts[0]).String()).To(Equal(reflect.TypeOf(&managementPortNetdev{}).String())) Expect(reflect.TypeOf(mgmtPorts[1]).String()).To(Equal(reflect.TypeOf(&managementPortRepresentor{}).String())) }) It("Creates managementPortNetdev and managementPortRepresentor with proper device names", func() { - config.OvnKubeNode.MgmtPortNetdev = "ens1f0v0" - config.OvnKubeNode.MgmtPortRepresentor = "ens1f0_0" - mgmtPorts := NewManagementPorts("worker-node", nil) + netdevName, rep := "ens1f0v0", "ens1f0_0" + config.OvnKubeNode.MgmtPortNetdev = netdevName + mgmtPorts := NewManagementPorts("worker-node", nil, netdevName, rep) Expect(len(mgmtPorts)).To(Equal(2)) Expect(reflect.TypeOf(mgmtPorts[1]).String()).To(Equal(reflect.TypeOf(&managementPortRepresentor{}).String())) port, _ := mgmtPorts[1].(*managementPortRepresentor) diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index dac7a6c9b6..2eecbb20ce 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -54,6 +54,9 @@ const ( // OvnDefaultNetworkGateway captures L3 gateway config for default OVN network interface ovnDefaultNetworkGateway = "default" + // ovnNodeManagementPort is the constant string representing the annotation key + ovnNodeManagementPort = "k8s.ovn.org/node-mgmt-port" + // ovnNodeManagementPortMacAddress is the constant string representing the annotation key ovnNodeManagementPortMacAddress = "k8s.ovn.org/node-mgmt-port-mac-address" @@ -336,6 +339,39 @@ func NodeChassisIDAnnotationChanged(oldNode, newNode *kapi.Node) bool { return oldNode.Annotations[ovnNodeChassisID] != newNode.Annotations[ovnNodeChassisID] } +type ManagementPortDetails struct { + PfId int `json:"PfId"` + FuncId int `json:"FuncId"` +} + +func SetNodeManagementPortAnnotation(nodeAnnotator kube.Annotator, PfId int, FuncId int) error { + mgmtPortDetails := ManagementPortDetails{ + PfId: PfId, + FuncId: FuncId, + } + bytes, err := json.Marshal(mgmtPortDetails) + if err != nil { + return fmt.Errorf("failed to marshal mgmtPortDetails with PfId '%v', FuncId '%v'", PfId, FuncId) + } + return nodeAnnotator.Set(ovnNodeManagementPort, string(bytes)) +} + +// ParseNodeManagementPort returns the parsed host addresses living on a node +func ParseNodeManagementPortAnnotation(node *kapi.Node) (int, int, error) { + mgmtPortAnnotation, ok := node.Annotations[ovnNodeManagementPort] + if !ok { + return -1, -1, newAnnotationNotSetError("%s annotation not found for node %q", ovnNodeManagementPort, node.Name) + } + + cfg := ManagementPortDetails{} + if err := json.Unmarshal([]byte(mgmtPortAnnotation), &cfg); err != nil { + return -1, -1, fmt.Errorf("failed to unmarshal management port annotation %s for node %q: %v", + mgmtPortAnnotation, node.Name, err) + } + + return cfg.PfId, cfg.FuncId, nil +} + func SetNodeManagementPortMACAddress(nodeAnnotator kube.Annotator, macAddress net.HardwareAddr) error { return nodeAnnotator.Set(ovnNodeManagementPortMacAddress, macAddress.String()) } From aa454fa7ecac3216a1eb001ab36facd9d846aa68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 10 May 2023 08:16:04 +0000 Subject: [PATCH 47/90] Egress IP, Services: use all node IP addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Match on all node's IP addresses on the no re-route policies so that intra cluster traffic is not routed through the egress or service IP even when the destination IP address is a secondary node IP. Secondary node IPs might be VIPs that move across nodes. Signed-off-by: Jaime CaamaƱo Ruiz --- .../egress_services_controller.go | 35 ++-- .../egress_services/egress_services_node.go | 32 ++- .../pkg/ovn/default_network_controller.go | 110 +--------- go-controller/pkg/ovn/egressip.go | 196 +++++++++++------- go-controller/pkg/ovn/egressip_test.go | 139 +++++++++++++ go-controller/pkg/ovn/egressservices_test.go | 33 ++- go-controller/pkg/ovn/ovn.go | 11 +- go-controller/pkg/util/util.go | 28 +++ test/e2e/egress_services.go | 24 ++- test/e2e/egressip.go | 129 ++++++++---- 10 files changed, 469 insertions(+), 268 deletions(-) diff --git a/go-controller/pkg/ovn/controller/egress_services/egress_services_controller.go b/go-controller/pkg/ovn/controller/egress_services/egress_services_controller.go index e531ec4ab9..d5ea94e4b7 100644 --- a/go-controller/pkg/ovn/controller/egress_services/egress_services_controller.go +++ b/go-controller/pkg/ovn/controller/egress_services/egress_services_controller.go @@ -47,10 +47,8 @@ const ( type InitClusterEgressPoliciesFunc func(client libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory, controllerName string) error -type CreateNoRerouteNodePoliciesFunc func(client libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory, - node *corev1.Node, controllerName string) error -type DeleteNoRerouteNodePoliciesFunc func(addressSetFactory addressset.AddressSetFactory, nodeName string, - v4NodeAddr, v6NodeAddr net.IP, controllerName string) error +type EnsureNoRerouteNodePoliciesFunc func(client libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory, + controllerName string, nodeLister corelisters.NodeLister) error type DeleteLegacyDefaultNoRerouteNodePoliciesFunc func(libovsdbclient.Client, string) error type Controller struct { @@ -61,8 +59,7 @@ type Controller struct { sync.Mutex initClusterEgressPolicies InitClusterEgressPoliciesFunc - createNoRerouteNodePolicies CreateNoRerouteNodePoliciesFunc - deleteNoRerouteNodePolicies DeleteNoRerouteNodePoliciesFunc + ensureNoRerouteNodePolicies EnsureNoRerouteNodePoliciesFunc deleteLegacyDefaultNoRerouteNodePolicies DeleteLegacyDefaultNoRerouteNodePoliciesFunc IsReachable func(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool // TODO: make a universal cache instead setEgressServiceStatus func(ns, name, host string) error @@ -104,17 +101,15 @@ type svcState struct { } type nodeState struct { - name string - labels map[string]string - mgmtIPs []net.IP - v4MgmtIP net.IP - v6MgmtIP net.IP - v4InternalNodeIP net.IP - v6InternalNodeIP net.IP - healthClient healthcheck.EgressIPHealthClient - allocations map[string]*svcState // svc key -> state - reachable bool - draining bool + name string + labels map[string]string + mgmtIPs []net.IP + v4MgmtIP net.IP + v6MgmtIP net.IP + healthClient healthcheck.EgressIPHealthClient + allocations map[string]*svcState // svc key -> state + reachable bool + draining bool } func NewController( @@ -123,8 +118,7 @@ func NewController( nbClient libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory, initClusterEgressPolicies InitClusterEgressPoliciesFunc, - createNoRerouteNodePolicies CreateNoRerouteNodePoliciesFunc, - deleteNoRerouteNodePolicies DeleteNoRerouteNodePoliciesFunc, + ensureNoRerouteNodePolicies EnsureNoRerouteNodePoliciesFunc, deleteLegacyDefaultNoRerouteNodePolicies DeleteLegacyDefaultNoRerouteNodePoliciesFunc, setEgressServiceStatus func(ns, name, host string) error, isReachable func(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool, @@ -141,8 +135,7 @@ func NewController( nbClient: nbClient, addressSetFactory: addressSetFactory, initClusterEgressPolicies: initClusterEgressPolicies, - createNoRerouteNodePolicies: createNoRerouteNodePolicies, - deleteNoRerouteNodePolicies: deleteNoRerouteNodePolicies, + ensureNoRerouteNodePolicies: ensureNoRerouteNodePolicies, deleteLegacyDefaultNoRerouteNodePolicies: deleteLegacyDefaultNoRerouteNodePolicies, IsReachable: isReachable, setEgressServiceStatus: setEgressServiceStatus, diff --git a/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go b/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go index 309b08cb1b..439a770b59 100644 --- a/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go +++ b/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go @@ -104,9 +104,11 @@ func (c *Controller) onNodeUpdate(oldObj, newObj interface{}) { oldNodeReady := nodeIsReady(oldNode) newNodeReady := nodeIsReady(newNode) - // We only care about node updates that relate to readiness or label changes + // We only care about node updates that relate to readiness, labels or + // addresses if labels.Equals(oldNodeLabels, newNodeLabels) && - oldNodeReady == newNodeReady { + oldNodeReady == newNodeReady && + !util.NodeHostAddressesAnnotationChanged(oldNode, newNode) { return } @@ -177,6 +179,13 @@ func (c *Controller) syncNode(key string) error { return err } + // We ensure node no re-route policies contemplating possible node IP + // address changes regardless of allocated services. + err = c.ensureNoRerouteNodePolicies(c.nbClient, c.addressSetFactory, c.controllerName, c.nodeLister) + if err != nil { + return err + } + n, err := c.nodeLister.Get(nodeName) if err != nil && !apierrors.IsNotFound(err) { return err @@ -198,21 +207,10 @@ func (c *Controller) syncNode(key string) error { } delete(c.nodes, nodeName) state.healthClient.Disconnect() - } else { - // we don't have a node at this point (node deleted?) and we don't have its cache - // entry (state==nil) as well. Maybe state was deleted when node became nodeReady or unreachable - // nothing to sync here - return nil } - return c.deleteNoRerouteNodePolicies(c.addressSetFactory, nodeName, state.v4InternalNodeIP, - state.v6InternalNodeIP, c.controllerName) - } - - // We create the per-node reroute policies as long as it has a resource (n != nil at this point), - // regardless if it was allocated services or not. - if err := c.createNoRerouteNodePolicies(c.nbClient, c.addressSetFactory, n, c.controllerName); err != nil { - return err + // nothing to sync here + return nil } nodeReady := nodeIsReady(n) @@ -318,9 +316,7 @@ func (c *Controller) nodeStateFor(name string) (*nodeState, error) { v6IP = ip } - v4NodeAddr, v6NodeAddr := util.GetNodeInternalAddrs(node) - - return &nodeState{name: name, mgmtIPs: mgmtIPs, v4MgmtIP: v4IP, v6MgmtIP: v6IP, v4InternalNodeIP: v4NodeAddr, v6InternalNodeIP: v6NodeAddr, + return &nodeState{name: name, mgmtIPs: mgmtIPs, v4MgmtIP: v4IP, v6MgmtIP: v6IP, healthClient: healthcheck.NewEgressIPHealthClient(name), allocations: map[string]*svcState{}, labels: node.Labels, reachable: true, draining: false}, nil } diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 4533c1368b..a77dc7f008 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -112,8 +112,6 @@ type DefaultNetworkController struct { retryEgressNodes *retry.RetryFramework // retry framework for Egress Firewall Nodes retryEgressFwNodes *retry.RetryFramework - // EgressIP Node-specific syncMap used by egressip node event handler - addEgressNodeFailed sync.Map // Node-specific syncMaps used by node event handler gatewaysFailed sync.Map @@ -192,6 +190,7 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, eIPC: egressIPController{ egressIPAssignmentMutex: &sync.Mutex{}, podAssignmentMutex: &sync.Mutex{}, + nodeIPUpdateMutex: &sync.Mutex{}, podAssignment: make(map[string]*podAssignmentState), pendingCloudPrivateIPConfigsMutex: &sync.Mutex{}, pendingCloudPrivateIPConfigsOps: make(map[string]map[string]*cloudPrivateIPConfigOp), @@ -773,26 +772,7 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from case factory.EgressNodeType: node := obj.(*kapi.Node) - if err := h.oc.setupNodeForEgress(node); err != nil { - return err - } - nodeEgressLabel := util.GetNodeEgressLabel() - nodeLabels := node.GetLabels() - _, hasEgressLabel := nodeLabels[nodeEgressLabel] - if hasEgressLabel { - h.oc.setNodeEgressAssignable(node.Name, true) - } - isReady := h.oc.isEgressNodeReady(node) - if isReady { - h.oc.setNodeEgressReady(node.Name, true) - } - isReachable := h.oc.isEgressNodeReachable(node) - if hasEgressLabel && isReachable && isReady { - h.oc.setNodeEgressReachable(node.Name, true) - if err := h.oc.addEgressNode(node.Name); err != nil { - return err - } - } + return h.oc.reconcileNodeForEgressIP(nil, node) case factory.EgressFwNodeType: node := obj.(*kapi.Node) @@ -916,79 +896,7 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int case factory.EgressNodeType: oldNode := oldObj.(*kapi.Node) newNode := newObj.(*kapi.Node) - - // Check if the node's internal addresses changed. If so, - // delete and readd the node for egress to update LR policies. - // We are only interested in the IPs here, not the subnet information. - oldV4Addr, oldV6Addr := util.GetNodeInternalAddrs(oldNode) - newV4Addr, newV6Addr := util.GetNodeInternalAddrs(newNode) - if !oldV4Addr.Equal(newV4Addr) || !oldV6Addr.Equal(newV6Addr) { - klog.Infof("Egress IP detected IP address change. Recreating node %s for Egress IP.", newNode.Name) - if err := h.oc.deleteNodeForEgress(oldNode); err != nil { - klog.Error(err) - } - if err := h.oc.setupNodeForEgress(newNode); err != nil { - klog.Error(err) - } - } - - // Initialize the allocator on every update, - // ovnkube-node/cloud-network-config-controller will make sure to - // annotate the node with the egressIPConfig, but that might have - // happened after we processed the ADD for that object, hence keep - // retrying for all UPDATEs. - if err := h.oc.initEgressIPAllocator(newNode); err != nil { - klog.Warningf("Egress node initialization error: %v", err) - } - nodeEgressLabel := util.GetNodeEgressLabel() - oldLabels := oldNode.GetLabels() - newLabels := newNode.GetLabels() - _, oldHadEgressLabel := oldLabels[nodeEgressLabel] - _, newHasEgressLabel := newLabels[nodeEgressLabel] - // If the node is not labeled for egress assignment, just return - // directly, we don't really need to set the ready / reachable - // status on this node if the user doesn't care about using it. - if !oldHadEgressLabel && !newHasEgressLabel { - return nil - } - h.oc.setNodeEgressAssignable(newNode.Name, newHasEgressLabel) - if oldHadEgressLabel && !newHasEgressLabel { - klog.Infof("Node: %s has been un-labeled, deleting it from egress assignment", newNode.Name) - return h.oc.deleteEgressNode(oldNode.Name) - } - isOldReady := h.oc.isEgressNodeReady(oldNode) - isNewReady := h.oc.isEgressNodeReady(newNode) - isNewReachable := h.oc.isEgressNodeReachable(newNode) - h.oc.setNodeEgressReady(newNode.Name, isNewReady) - if !oldHadEgressLabel && newHasEgressLabel { - klog.Infof("Node: %s has been labeled, adding it for egress assignment", newNode.Name) - if isNewReady && isNewReachable { - h.oc.setNodeEgressReachable(newNode.Name, isNewReachable) - if err := h.oc.addEgressNode(newNode.Name); err != nil { - return err - } - } else { - klog.Warningf("Node: %s has been labeled, but node is not ready"+ - " and reachable, cannot use it for egress assignment", newNode.Name) - } - return nil - } - if isOldReady == isNewReady { - return nil - } - if !isNewReady { - klog.Warningf("Node: %s is not ready, deleting it from egress assignment", newNode.Name) - if err := h.oc.deleteEgressNode(newNode.Name); err != nil { - return err - } - } else if isNewReady && isNewReachable { - klog.Infof("Node: %s is ready and reachable, adding it for egress assignment", newNode.Name) - h.oc.setNodeEgressReachable(newNode.Name, isNewReachable) - if err := h.oc.addEgressNode(newNode.Name); err != nil { - return err - } - } - return nil + return h.oc.reconcileNodeForEgressIP(oldNode, newNode) case factory.EgressFwNodeType: oldNode := oldObj.(*kapi.Node) @@ -1058,17 +966,7 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int case factory.EgressNodeType: node := obj.(*kapi.Node) - if err := h.oc.deleteNodeForEgress(node); err != nil { - return err - } - nodeEgressLabel := util.GetNodeEgressLabel() - nodeLabels := node.GetLabels() - if _, hasEgressLabel := nodeLabels[nodeEgressLabel]; hasEgressLabel { - if err := h.oc.deleteEgressNode(node.Name); err != nil { - return err - } - } - return nil + return h.oc.reconcileNodeForEgressIP(node, nil) case factory.EgressFwNodeType: node, ok := obj.(*kapi.Node) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 99c7232c3a..ec43526802 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -37,6 +37,7 @@ import ( "k8s.io/apimachinery/pkg/labels" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" + listers "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" @@ -1904,13 +1905,6 @@ func (oc *DefaultNetworkController) setNodeEgressReachable(nodeName string, isRe func (oc *DefaultNetworkController) addEgressNode(nodeName string) error { var errors []error - // Check if EgressIP node create failed and if does try adding it again - if node, ok := oc.addEgressNodeFailed.Load(nodeName); ok { - failedNode := node.(*kapi.Node) - if err := oc.setupNodeForEgress(failedNode); err != nil { - return err - } - } klog.V(5).Infof("Egress node: %s about to be initialized", nodeName) // This option will program OVN to start sending GARPs for all external IPS // that the logical switch port has been configured to use. This is @@ -2045,28 +2039,101 @@ func (oc *DefaultNetworkController) initEgressIPAllocator(node *kapi.Node) (err return nil } -// setupNodeForEgress sets up default logical router policy for every node and -// initiates the allocator cache for the node in question, if the node has the -// necessary annotation. -func (oc *DefaultNetworkController) setupNodeForEgress(node *v1.Node) error { - if err := CreateDefaultNoRerouteNodePolicies(oc.nbClient, oc.addressSetFactory, node, oc.controllerName); err != nil { - oc.addEgressNodeFailed.Store(node.Name, node) - return err +// reconcileNodeForEgressIP with respect and old and new status of a node +func (oc *DefaultNetworkController) reconcileNodeForEgressIP(oldNode, newNode *v1.Node) error { + // Check if the node's addresses changed. If so, update LR policies. + if oldNode == nil || newNode == nil || util.NodeHostAddressesAnnotationChanged(oldNode, newNode) { + klog.Infof("Egress IP detected IP address change. Updating no re-route policies") + err := oc.ensureDefaultNoRerouteNodePolicies() + if err != nil { + return err + } + } + + nodeEgressLabel := util.GetNodeEgressLabel() + var oldLabels map[string]string + var newLabels map[string]string + var isOldReady, isNewReady, isNewReachable bool + var nodeName string + if oldNode != nil { + oldLabels = oldNode.GetLabels() + isOldReady = oc.isEgressNodeReady(oldNode) + nodeName = oldNode.Name + } + if newNode != nil { + // Initialize the allocator on every update, + // ovnkube-node/cloud-network-config-controller will make sure to + // annotate the node with the egressIPConfig, but that might have + // happened after we processed the ADD for that object, hence keep + // retrying for all UPDATEs. + if err := oc.initEgressIPAllocator(newNode); err != nil { + klog.Warningf("Egress node initialization error: %v", err) + } + + newLabels = newNode.GetLabels() + isNewReady = oc.isEgressNodeReady(newNode) + isNewReachable = oc.isEgressNodeReachable(newNode) + nodeName = newNode.Name + } else if oldNode != nil { + err := oc.deleteEgressIPAllocator(oldNode) + if err != nil { + return nil + } + } + + _, oldHadEgressLabel := oldLabels[nodeEgressLabel] + _, newHasEgressLabel := newLabels[nodeEgressLabel] + oc.setNodeEgressAssignable(nodeName, newHasEgressLabel) + oc.setNodeEgressReady(nodeName, isNewReady) + + // If the node is not labeled for egress assignment, just return + // directly, we don't really need to set the ready / reachable + // status on this node if the user doesn't care about using it. + if !oldHadEgressLabel && !newHasEgressLabel { + return nil + } + + if oldHadEgressLabel && !newHasEgressLabel { + klog.Infof("Node: %s has been un-labeled, deleting it from egress assignment", nodeName) + return oc.deleteEgressNode(nodeName) + } + + if !oldHadEgressLabel && newHasEgressLabel { + klog.Infof("Node: %s has been labeled, adding it for egress assignment", nodeName) + if isNewReady && isNewReachable { + oc.setNodeEgressReachable(nodeName, isNewReachable) + if err := oc.addEgressNode(nodeName); err != nil { + return err + } + } else { + klog.Warningf("Node: %s has been labeled, but node is not ready"+ + " and reachable, cannot use it for egress assignment", nodeName) + } + return nil } - oc.addEgressNodeFailed.Delete(node.Name) - if err := oc.initEgressIPAllocator(node); err != nil { - klog.V(5).Infof("Egress node initialization error: %v", err) + + if isOldReady == isNewReady { + return nil + } + + if !isNewReady { + klog.Warningf("Node: %s is not ready, deleting it from egress assignment", nodeName) + if err := oc.deleteEgressNode(nodeName); err != nil { + return err + } + } else if isNewReady && isNewReachable { + klog.Infof("Node: %s is ready and reachable, adding it for egress assignment", nodeName) + oc.setNodeEgressReachable(nodeName, isNewReachable) + if err := oc.addEgressNode(nodeName); err != nil { + return err + } } + return nil } -// deleteNodeForEgress remove the default allow logical router policies for the -// node and removes the node from the allocator cache. -func (oc *DefaultNetworkController) deleteNodeForEgress(node *v1.Node) error { - v4NodeAddr, v6NodeAddr := util.GetNodeInternalAddrs(node) - if err := DeleteDefaultNoRerouteNodePolicies(oc.addressSetFactory, node.Name, v4NodeAddr, v6NodeAddr, oc.controllerName); err != nil { - return err - } +// deleteEgressIPAllocator removes the node from the allocator cache. +func (oc *DefaultNetworkController) deleteEgressIPAllocator(node *v1.Node) error { oc.eIPC.allocator.Lock() if eNode, exists := oc.eIPC.allocator.cache[node.Name]; exists { eNode.healthClient.Disconnect() @@ -2200,6 +2267,9 @@ type egressIPController struct { // Currently WatchEgressIP, WatchEgressNamespace and WatchEgressPod could // all access that map simultaneously, hence why this guard is needed. podAssignmentMutex *sync.Mutex + // nodeIPUpdateMutex is used to ensure safe handling of node ip address + // updates. VIP addresses are dynamic and might move across nodes. + nodeIPUpdateMutex *sync.Mutex // podAssignment is a cache used for keeping track of which egressIP status // has been setup for each pod. The key is defined by getPodKey podAssignment map[string]*podAssignmentState @@ -2796,32 +2866,45 @@ func createDefaultNoReroutePodPolicies(nbClient libovsdbclient.Client, v4Cluster return nil } -// createDefaultNoRerouteNodePolicies ensures egress pods east<->west traffic with hostNetwork pods, +func (oc *DefaultNetworkController) ensureDefaultNoRerouteNodePolicies() error { + oc.eIPC.nodeIPUpdateMutex.Lock() + defer oc.eIPC.nodeIPUpdateMutex.Unlock() + nodeLister := listers.NewNodeLister(oc.watchFactory.NodeInformer().GetIndexer()) + return ensureDefaultNoRerouteNodePolicies(oc.nbClient, oc.addressSetFactory, oc.controllerName, nodeLister) +} + +// ensureDefaultNoRerouteNodePolicies ensures egress pods east<->west traffic with hostNetwork pods, // i.e: ensuring that an egress pod can still communicate with a hostNetwork pod / service backed by hostNetwork pods // without using egressIPs. // sample: 101 ip4.src == $a12749576804119081385 && ip4.dst == $a11079093880111560446 allow pkt_mark=1008 -func CreateDefaultNoRerouteNodePolicies(nbClient libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory, node *kapi.Node, controllerName string) error { - v4NodeAddr, v6NodeAddr := util.GetNodeInternalAddrs(node) +// All the cluster node's addresses are considered. This is to avoid race conditions after a VIP moves from one node +// to another where we might process events out of order. For the same reason this function needs to be called under +// lock. +func ensureDefaultNoRerouteNodePolicies(nbClient libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory, controllerName string, nodeLister listers.NodeLister) error { + nodes, err := nodeLister.List(labels.Everything()) + if err != nil { + return err + } + + v4NodeAddrs, v6NodeAddrs, err := util.GetNodeAddresses(config.IPv4Mode, config.IPv6Mode, nodes...) + if err != nil { + return err + } + + allAddresses := make([]net.IP, 0, len(v4NodeAddrs)+len(v6NodeAddrs)) + allAddresses = append(allAddresses, v4NodeAddrs...) + allAddresses = append(allAddresses, v6NodeAddrs...) + var as addressset.AddressSet - var err error dbIDs := getEgressIPAddrSetDbIDs(NodeIPAddrSetName, controllerName) if as, err = addressSetFactory.GetAddressSet(dbIDs); err != nil { return fmt.Errorf("cannot ensure that addressSet %s exists %v", NodeIPAddrSetName, err) } - if v4NodeAddr != nil { - // add the nodeIP to the nodeIP address-set - if err = as.AddIPs([]net.IP{v4NodeAddr}); err != nil { - return fmt.Errorf("unable to add nodeIPs %s/%s for node %s: to the address set %s, err: %v", - v4NodeAddr.String(), v6NodeAddr.String(), node.Name, NodeIPAddrSetName, err) - } - } - if v6NodeAddr != nil { - // add the nodeIP to the nodeIP address-set - if err = as.AddIPs([]net.IP{v6NodeAddr}); err != nil { - return fmt.Errorf("unable to add nodeIPs %s/%s for node %s: to the address set %s, err: %v", - v4NodeAddr.String(), v6NodeAddr.String(), node.Name, NodeIPAddrSetName, err) - } + + if err = as.SetIPs(allAddresses); err != nil { + return fmt.Errorf("unable to set IPs to no re-route address set %s: %w", NodeIPAddrSetName, err) } + ipv4ClusterNodeIPAS, ipv6ClusterNodeIPAS := as.GetASHashNames() // fetch the egressIP pods address-set dbIDs = getEgressIPAddrSetDbIDs(EgressIPServedPodsAddrSetName, controllerName) @@ -2839,11 +2922,11 @@ func CreateDefaultNoRerouteNodePolicies(nbClient libovsdbclient.Client, addressS var matchV4, matchV6 string // construct the policy match - if v4NodeAddr != nil { + if len(v4NodeAddrs) > 0 { matchV4 = fmt.Sprintf(`(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s`, ipv4EgressIPServedPodsAS, ipv4EgressServiceServedPodsAS, ipv4ClusterNodeIPAS) } - if v6NodeAddr != nil { + if len(v6NodeAddrs) > 0 { matchV6 = fmt.Sprintf(`(ip6.src == $%s || ip6.src == $%s) && ip6.dst == $%s`, ipv6EgressIPServedPodsAS, ipv6EgressServiceServedPodsAS, ipv6ClusterNodeIPAS) } @@ -2863,33 +2946,6 @@ func CreateDefaultNoRerouteNodePolicies(nbClient libovsdbclient.Client, addressS return nil } -// DeleteDefaultNoRerouteNodePolicies deletes the EIP node IP from the global node address-set -// NOTE: We haven't added logic to fully delete the policy because there can never be a cluster with 0 nodes -// So once created this policy will exist forever -func DeleteDefaultNoRerouteNodePolicies(addressSetFactory addressset.AddressSetFactory, nodeName string, v4NodeAddr, v6NodeAddr net.IP, controllerName string) error { - var as addressset.AddressSet - var err error - dbIDs := getEgressIPAddrSetDbIDs(NodeIPAddrSetName, controllerName) - if as, err = addressSetFactory.GetAddressSet(dbIDs); err != nil { - return fmt.Errorf("cannot ensure that addressSet %s exists %v", NodeIPAddrSetName, err) - } - if v4NodeAddr != nil { - // remove the nodeIP from the nodeIP address-set - if err = as.DeleteIPs([]net.IP{v4NodeAddr}); err != nil { - return fmt.Errorf("unable to delete nodeIPs %s/%s for node %s: to the address set %s, err: %v", - v4NodeAddr.String(), v6NodeAddr.String(), nodeName, NodeIPAddrSetName, err) - } - } - if v6NodeAddr != nil { - // remove the nodeIP from the nodeIP address-set - if err = as.DeleteIPs([]net.IP{v6NodeAddr}); err != nil { - return fmt.Errorf("unable to delete nodeIPs %s/%s for node %s: to the address set %s, err: %v", - v4NodeAddr.String(), v6NodeAddr.String(), nodeName, NodeIPAddrSetName, err) - } - } - return nil -} - func createLogicalRouterPolicy(nbClient libovsdbclient.Client, match string, priority int, externalIDs, options map[string]string) error { lrp := nbdb.LogicalRouterPolicy{ Priority: priority, diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 991400d653..57a6cb03c2 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -11,6 +11,8 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -1063,6 +1065,143 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + + ginkgo.It("should update node no-reroute policy address set", func() { + app.Action = func(ctx *cli.Context) error { + + config.IPv6Mode = true + node1IPv4 := "192.168.126.202" + node1IPv6 := "fc00:f853:ccd:e793::1" + node2IPv4 := "192.168.126.51" + node2IPv6 := "fc00:f853:ccd:e793::2" + vipIPv4 := "192.168.126.10" + vipIPv6 := "fc00:f853:ccd:e793::10" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/host-addresses": fmt.Sprintf("[\"%s\", \"%s\", \"%s\", \"%s\"]", node1IPv4, node1IPv6, vipIPv4, vipIPv6), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/host-addresses": fmt.Sprintf("[\"%s\", \"%s\"]", node2IPv4, node2IPv6), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + }, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + err := fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + nodeIPsASdbIDs := getEgressIPAddrSetDbIDs(NodeIPAddrSetName, DefaultNetworkControllerName) + fakeOvn.asf.EventuallyExpectAddressSetWithIPs(nodeIPsASdbIDs, []string{node1IPv4, node2IPv4, node1IPv6, node2IPv6, vipIPv4, vipIPv6}) + + egressSvcPodsV4, egressSvcPodsV6 := addressset.GetHashNamesForAS(egresssvc.GetEgressServiceAddrSetDbIDs(DefaultNetworkControllerName)) + egressipPodsV4, egressipPodsV6 := addressset.GetHashNamesForAS(getEgressIPAddrSetDbIDs(EgressIPServedPodsAddrSetName, DefaultNetworkControllerName)) + nodeIPsV4, nodeIPsV6 := addressset.GetHashNamesForAS(nodeIPsASdbIDs) + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressipPodsV4, egressSvcPodsV4, nodeIPsV4), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": "1008"}, + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip6.src == $%s || ip6.src == $%s) && ip6.dst == $%s", + egressipPodsV6, egressSvcPodsV6, nodeIPsV6), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-v6-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": "1008"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{ + "default-no-reroute-UUID", + "no-reroute-service-UUID", + "default-no-reroute-node-UUID", + "default-v6-no-reroute-node-UUID", + }, + }, + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + node1.ObjectMeta.Annotations["k8s.ovn.org/host-addresses"] = fmt.Sprintf("[\"%s\", \"%s\"]", node1IPv4, node1IPv6) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.asf.EventuallyExpectAddressSetWithIPs(nodeIPsASdbIDs, []string{node1IPv4, node2IPv4, node1IPv6, node2IPv6}) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + node2.ObjectMeta.Annotations["k8s.ovn.org/host-addresses"] = fmt.Sprintf("[\"%s\", \"%s\", \"%s\", \"%s\"]", node2IPv4, node2IPv6, vipIPv4, vipIPv6) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.asf.EventuallyExpectAddressSetWithIPs(nodeIPsASdbIDs, []string{node1IPv4, node2IPv4, node1IPv6, node2IPv6, vipIPv4, vipIPv6}) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.asf.EventuallyExpectAddressSetWithIPs(nodeIPsASdbIDs, []string{node2IPv4, node2IPv6, vipIPv4, vipIPv6}) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) ginkgo.Context("On node DELETE", func() { diff --git a/go-controller/pkg/ovn/egressservices_test.go b/go-controller/pkg/ovn/egressservices_test.go index dffda0504e..a04d70e9e5 100644 --- a/go-controller/pkg/ovn/egressservices_test.go +++ b/go-controller/pkg/ovn/egressservices_test.go @@ -43,6 +43,8 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { node2IPv6 string = "fc00:f853:ccd:e793::2" node2IPv4Subnet string = "10.128.2.0/24" node2IPv6Subnet string = "fe00:10:128:2::/64" + vipIPv4 string = "192.168.126.10" + vipIPv6 string = "fc00:f853:ccd:e793::10" controllerName = DefaultNetworkControllerName egressSVCLabelPrefix string = "egress-service.k8s.ovn.org" ) @@ -1524,7 +1526,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { }) ginkgo.Context("on nodes changes", func() { - ginkgo.It("should create/update/delete logical router policies, labels and status", func() { + ginkgo.It("should create/update/delete logical router policies, address sets, labels and status", func() { app.Action = func(ctx *cli.Context) error { namespaceT := *newNamespace("testns") config.IPv6Mode = true @@ -1818,6 +1820,33 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { } gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + ginkgo.By("updating the second node host addresses the node ip no re-route address set will be updated") + nodeIPsASdbIDs := getEgressIPAddrSetDbIDs(NodeIPAddrSetName, DefaultNetworkControllerName) + fakeOVN.asf.EventuallyExpectAddressSetWithIPs(nodeIPsASdbIDs, []string{node1IPv4, node2IPv4, node1IPv6, node2IPv6}) + + node2.ObjectMeta.Annotations["k8s.ovn.org/host-addresses"] = fmt.Sprintf("[\"%s\", \"%s\", \"%s\", \"%s\"]", node2IPv4, node2IPv6, vipIPv4, vipIPv6) + node2.ResourceVersion = "3" + _, err = fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOVN.asf.EventuallyExpectAddressSetWithIPs(nodeIPsASdbIDs, []string{node1IPv4, node2IPv4, node1IPv6, node2IPv6, vipIPv4, vipIPv6}) + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + node2.ObjectMeta.Annotations["k8s.ovn.org/host-addresses"] = fmt.Sprintf("[\"%s\", \"%s\"]", node2IPv4, node2IPv6) + node2.ResourceVersion = "4" + _, err = fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOVN.asf.EventuallyExpectAddressSetWithIPs(nodeIPsASdbIDs, []string{node1IPv4, node2IPv4, node1IPv6, node2IPv6}) + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + ginkgo.By("deleting the first node, the node ip no re-route address set will be updated") + err = fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, metav1.DeleteOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + fakeOVN.asf.EventuallyExpectAddressSetWithIPs(nodeIPsASdbIDs, []string{node2IPv4, node2IPv6}) + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + ginkgo.By("deleting the second node the second service's resources will be deleted") err = fakeOVN.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node2.Name, metav1.DeleteOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) @@ -1831,6 +1860,7 @@ var _ = ginkgo.Describe("OVN Egress Service Operations", func() { clusterRouter.Policies = append(clusterRouter.Policies, lrp.UUID) } gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + fakeOVN.asf.EventuallyExpectEmptyAddressSetExist(nodeIPsASdbIDs) return nil } @@ -2179,6 +2209,7 @@ func nodeFor(name, ipv4, ipv6, v4subnet, v6subnet string) *v1.Node { Name: name, Annotations: map[string]string{ "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", ipv4, ipv6), + "k8s.ovn.org/host-addresses": fmt.Sprintf("[\"%s\", \"%s\"]", ipv4, ipv6), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\",\"%s\"]}", v4subnet, v6subnet), }, Labels: map[string]string{ diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index bbd1f47d87..d10ca81cc2 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -28,6 +28,7 @@ import ( "k8s.io/client-go/informers" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" + listers "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/record" ref "k8s.io/client-go/tools/reference" "k8s.io/klog/v2" @@ -469,14 +470,14 @@ func (oc *DefaultNetworkController) InitEgressServiceController() (*egresssvc.Co // If the EgressIP controller is enabled it will take care of creating the // "no reroute" policies - we can pass "noop" functions to the egress service controller. initClusterEgressPolicies := func(libovsdbclient.Client, addressset.AddressSetFactory, string) error { return nil } - createNodeNoReroutePolicies := func(libovsdbclient.Client, addressset.AddressSetFactory, *kapi.Node, string) error { return nil } - deleteNodeNoReroutePolicies := func(addressset.AddressSetFactory, string, net.IP, net.IP, string) error { return nil } + ensureNodeNoReroutePolicies := func(libovsdbclient.Client, addressset.AddressSetFactory, string, listers.NodeLister) error { + return nil + } deleteLegacyDefaultNoRerouteNodePolicies := func(libovsdbclient.Client, string) error { return nil } if !config.OVNKubernetesFeature.EnableEgressIP { initClusterEgressPolicies = InitClusterEgressPolicies - createNodeNoReroutePolicies = CreateDefaultNoRerouteNodePolicies - deleteNodeNoReroutePolicies = DeleteDefaultNoRerouteNodePolicies + ensureNodeNoReroutePolicies = ensureDefaultNoRerouteNodePolicies deleteLegacyDefaultNoRerouteNodePolicies = DeleteLegacyDefaultNoRerouteNodePolicies } @@ -500,7 +501,7 @@ func (oc *DefaultNetworkController) InitEgressServiceController() (*egresssvc.Co } return egresssvc.NewController(DefaultNetworkControllerName, oc.client, oc.nbClient, oc.addressSetFactory, - initClusterEgressPolicies, createNodeNoReroutePolicies, deleteNodeNoReroutePolicies, deleteLegacyDefaultNoRerouteNodePolicies, oc.kube.UpdateEgressServiceStatus, + initClusterEgressPolicies, ensureNodeNoReroutePolicies, deleteLegacyDefaultNoRerouteNodePolicies, oc.kube.UpdateEgressServiceStatus, isReachable, oc.stopChan, oc.watchFactory.EgressServiceInformer(), oc.svcFactory.Core().V1().Services(), oc.svcFactory.Discovery().V1().EndpointSlices(), diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index b67f647283..fa1f45b719 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -21,6 +21,7 @@ import ( "github.com/urfave/cli/v2" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" @@ -111,6 +112,33 @@ func GetNodeInternalAddrs(node *v1.Node) (net.IP, net.IP) { return v4Addr, v6Addr } +// GetNodeAddresses returns all of the node's IPv4 and/or IPv6 annotated +// addresses as requested. Note that nodes not annotated will be ignored. +func GetNodeAddresses(ipv4, ipv6 bool, nodes ...*v1.Node) (ipsv4 []net.IP, ipsv6 []net.IP, err error) { + allips := sets.Set[string]{} + for _, node := range nodes { + ips, err := ParseNodeHostAddresses(node) + if IsAnnotationNotSetError(err) { + continue + } + if err != nil { + return nil, nil, err + } + allips = allips.Insert(ips.UnsortedList()...) + } + + for _, ip := range allips.UnsortedList() { + ip := utilnet.ParseIPSloppy(ip) + if ipv4 && utilnet.IsIPv4(ip) { + ipsv4 = append(ipsv4, ip) + } else if ipv6 && utilnet.IsIPv6(ip) { + ipsv6 = append(ipsv6, ip) + } + } + + return +} + // GetNodeChassisID returns the machine's OVN chassis ID func GetNodeChassisID() (string, error) { chassisID, stderr, err := RunOVSVsctl("--if-exists", "get", diff --git a/test/e2e/egress_services.go b/test/e2e/egress_services.go index e5bb482a7b..de3656883c 100644 --- a/test/e2e/egress_services.go +++ b/test/e2e/egress_services.go @@ -249,7 +249,24 @@ metadata: break } } - ginkgo.By("Creating host-networked pod, on non-egress node to act as \"another node\"") + ginkgo.By("By setting a secondary IP on non-egress node acting as \"another node\"") + var otherDstIP string + if protocol == v1.IPv6Protocol { + otherDstIP = "fc00:f853:ccd:e793:ffff::1" + } else { + otherDstIP = "172.18.1.1" + } + _, err = runCommand(containerRuntime, "exec", dstNode.Name, "ip", "addr", "add", otherDstIP, "dev", "breth0") + if err != nil { + framework.Failf("failed to add address to node %s: %v", dstNode.Name, err) + } + defer func() { + _, err = runCommand(containerRuntime, "exec", dstNode.Name, "ip", "addr", "delete", otherDstIP, "dev", "breth0") + if err != nil { + framework.Failf("failed to remove address from node %s: %v", dstNode.Name, err) + } + }() + ginkgo.By("Creating host-networked pod on non-egress node acting as \"another node\"") _, err = createPod(f, hostNetPod, dstNode.Name, f.Namespace.Name, []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%d", podHTTPPort)}, map[string]string{}, func(p *v1.Pod) { p.Spec.HostNetwork = true }) @@ -275,7 +292,10 @@ metadata: } gomega.Consistently(func() error { return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expectedsrcIP, dstIP, podHTTPPort) - }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach other node with node's ip") + }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach other node with node's primary ip") + gomega.Consistently(func() error { + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expectedsrcIP, otherDstIP, podHTTPPort) + }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach other node with node's secondary ip") } }, ginkgotable.Entry("ipv4 pods", v1.IPv4Protocol), diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index 3d30a207ee..ea6c426622 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -590,29 +590,34 @@ spec: ) }) - // Validate the egress IP by creating a httpd container on the kind networking - // (effectively seen as "outside" the cluster) and curl it from a pod in the cluster - // which matches the egress IP stanza. Aim is to check if the SNATs towards nodeIP versus - // SNATs towards egressIPs are being correctly deleted and recreated. + // Validate the egress IP by creating a httpd container on the kind + // networking (effectively seen as "outside" the cluster) and curl it from a + // pod in the cluster which matches the egress IP stanza. Aim is to check + // that the SNATs to egressIPs are being correctly deleted and recreated + // but not used for intra-cluster traffic. /* This test does the following: 0. Add the "k8s.ovn.org/egress-assignable" label to egress1Node - 1. Creating host-networked pod, on non-egress node (egress2Node) to act as "another node" - 2. Create an EgressIP object with one egress IP defined - 3. Check that the status is of length one and that it is assigned to egress1Node - 4. Create one pod matching the EgressIP: running on egress1Node - 5. Check connectivity from pod to an external "node" and verify that the srcIP is the expected egressIP - 6. Check connectivity from pod to another node (egress2Node) and verify that the srcIP is the expected nodeIP - 7. Add the "k8s.ovn.org/egress-assignable" label to egress2Node - 8. Remove the "k8s.ovn.org/egress-assignable" label from egress1Node - 9. Check that the status is of length one and that it is assigned to egress2Node - 10. Check connectivity from pod to an external "node" and verify that the srcIP is the expected egressIP - 11. Check connectivity from pod to another node (egress2Node) and verify that the srcIP is the expected nodeIP - 12. Create second pod not matching the EgressIP: running on egress1Node - 13. Check connectivity from second pod to external node and verify that the srcIP is the expected nodeIP - 14. Add pod selector label to make second pod egressIP managed - 15. Check connectivity from second pod to external node and verify that the srcIP is the expected egressIP - 16. Check connectivity from second pod to another node (egress2Node) and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted for pods unless pod is on its own egressNode) + 1. Setting a secondary IP on non-egress node acting as "another node" + 2. Creating host-networked pod on non-egress node (egress2Node) acting as "another node" + 3. Create an EgressIP object with one egress IP defined + 4. Check that the status is of length one and that it is assigned to egress1Node + 5. Create one pod matching the EgressIP: running on egress1Node + 6. Check connectivity from pod to an external "node" and verify that the srcIP is the expected egressIP + 7. Check connectivity from pod to another node (egress2Node) primary IP and verify that the srcIP is the expected nodeIP + 8. Check connectivity from pod to another node (egress2Node) secondary IP and verify that the srcIP is the expected nodeIP + 9. Add the "k8s.ovn.org/egress-assignable" label to egress2Node + 10. Remove the "k8s.ovn.org/egress-assignable" label from egress1Node + 11. Check that the status is of length one and that it is assigned to egress2Node + 12. Check connectivity from pod to an external "node" and verify that the srcIP is the expected egressIP + 13. Check connectivity from pod to another node (egress2Node) primary IP and verify that the srcIP is the expected nodeIP + 14. Check connectivity from pod to another node (egress2Node) secondary IP and verify that the srcIP is the expected nodeIP + 15. Create second pod not matching the EgressIP: running on egress1Node + 16. Check connectivity from second pod to external node and verify that the srcIP is the expected nodeIP + 17. Add pod selector label to make second pod egressIP managed + 18. Check connectivity from second pod to external node and verify that the srcIP is the expected egressIP + 19. Check connectivity from second pod to another node (egress2Node) primary IP and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted for pods unless pod is on its own egressNode) + 20. Check connectivity from second pod to another node (egress2Node) secondary IP and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted for pods unless pod is on its own egressNode) */ ginkgo.It("Should validate the egress IP SNAT functionality against host-networked pods", func() { @@ -623,8 +628,30 @@ spec: framework.Logf("Added egress-assignable label to node %s", egress1Node.name) framework.ExpectNodeHasLabel(f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") - ginkgo.By("1. Creating host-networked pod, on non-egress node to act as \"another node\"") - _, err := createPod(f, egress2Node.name+"-host-net-pod", egress2Node.name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { + ginkgo.By("1. By setting a secondary IP on non-egress node acting as \"another node\"") + var otherDstIP string + if utilnet.IsIPv6String(egress2Node.nodeIP) { + otherDstIP = "fc00:f853:ccd:e793:ffff::1" + } else { + otherDstIP = "172.18.1.1" + } + _, err := runCommand(containerRuntime, "exec", egress2Node.name, "ip", "addr", "add", otherDstIP, "dev", "breth0") + if err != nil { + framework.Failf("failed to add address to node %s: %v", egress2Node.name, err) + } + defer func() { + _, err = runCommand(containerRuntime, "exec", egress2Node.name, "ip", "addr", "delete", otherDstIP, "dev", "breth0") + if err != nil { + framework.Failf("failed to remove address from node %s: %v", egress2Node.name, err) + } + }() + otherHostNetPodIP := node{ + name: egress2Node.name + "-host-net-pod", + nodeIP: otherDstIP, + } + + ginkgo.By("2. Creating host-networked pod, on non-egress node acting as \"another node\"") + _, err = createPod(f, egress2Node.name+"-host-net-pod", egress2Node.name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { p.Spec.HostNetwork = true p.Spec.Containers[0].Image = "docker.io/httpd" }) @@ -641,7 +668,7 @@ spec: } updateNamespace(f, podNamespace) - ginkgo.By("2. Create an EgressIP object with one egress IP defined") + ginkgo.By("3. Create an EgressIP object with one egress IP defined") // Assign the egress IP without conflicting with any node IP, // the kind subnet is /16 or /64 so the following should be fine. egressNodeIP := net.ParseIP(egress1Node.nodeIP) @@ -674,13 +701,13 @@ spec: framework.Logf("Create the EgressIP configuration") framework.RunKubectlOrDie("default", "create", "-f", egressIPYaml) - ginkgo.By("3. Check that the status is of length one and that it is assigned to egress1Node") + ginkgo.By("4. Check that the status is of length one and that it is assigned to egress1Node") statuses := verifyEgressIPStatusLengthEquals(1, nil) if statuses[0].Node != egress1Node.name { - framework.Failf("Step 2. Check that the status is of length one and that it is assigned to egress1Node, failed") + framework.Failf("Step 4. Check that the status is of length one and that it is assigned to egress1Node, failed") } - ginkgo.By("4. Create one pod matching the EgressIP: running on egress1Node") + ginkgo.By("5. Create one pod matching the EgressIP: running on egress1Node") createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { @@ -691,43 +718,51 @@ spec: } return true, nil }) - framework.ExpectNoError(err, "Step 4. Create one pod matching the EgressIP: running on egress1Node, failed, err: %v", err) + framework.ExpectNoError(err, "Step 5. Create one pod matching the EgressIP: running on egress1Node, failed, err: %v", err) framework.Logf("Created pod %s on node %s", pod1Name, pod2Node.name) - ginkgo.By("5. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP") + ginkgo.By("6. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 5. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP, failed: %v", err) - ginkgo.By("6. Check connectivity from pod to another node and verify that the srcIP is the expected nodeIP") + ginkgo.By("7. Check connectivity from pod to another node primary IP and verify that the srcIP is the expected nodeIP") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(hostNetPod, pod1Name, podNamespace.Name, true, []string{egressNodeIP.String()})) framework.ExpectNoError(err, "Step 6. Check connectivity from pod to another node and verify that the srcIP is the expected nodeIP, failed: %v", err) - ginkgo.By("7. Add the \"k8s.ovn.org/egress-assignable\" label to egress2Node") + ginkgo.By("8. Check connectivity from pod to another node secondary IP and verify that the srcIP is the expected nodeIP") + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(otherHostNetPodIP, pod1Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + framework.ExpectNoError(err, "Step 6. Check connectivity from pod to another node and verify that the srcIP is the expected nodeIP, failed: %v", err) + + ginkgo.By("9. Add the \"k8s.ovn.org/egress-assignable\" label to egress2Node") framework.AddOrUpdateLabelOnNode(f.ClientSet, egress2Node.name, "k8s.ovn.org/egress-assignable", "dummy") framework.Logf("Added egress-assignable label to node %s", egress2Node.name) framework.ExpectNodeHasLabel(f.ClientSet, egress2Node.name, "k8s.ovn.org/egress-assignable", "dummy") - ginkgo.By("8. Remove the \"k8s.ovn.org/egress-assignable\" label from egress1Node") + ginkgo.By("10. Remove the \"k8s.ovn.org/egress-assignable\" label from egress1Node") framework.RemoveLabelOffNode(f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable") - ginkgo.By("9. Check that the status is of length one and that it is assigned to egress2Node") + ginkgo.By("11. Check that the status is of length one and that it is assigned to egress2Node") // There is sometimes a slight delay for the EIP fail over to happen, // so let's use the pollimmediate struct to check if eventually egress2Node becomes the egress node err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { statuses := getEgressIPStatusItems() return (len(statuses) == 1) && (statuses[0].Node == egress2Node.name), nil }) - framework.ExpectNoError(err, "Step 9. Check that the status is of length one and that it is assigned to egress2Node, failed: %v", err) + framework.ExpectNoError(err, "Step 11. Check that the status is of length one and that it is assigned to egress2Node, failed: %v", err) - ginkgo.By("10. Check connectivity from pod to an external \"node\" and verify that the srcIP is the expected egressIP") + ginkgo.By("12. Check connectivity from pod to an external \"node\" and verify that the srcIP is the expected egressIP") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) - framework.ExpectNoError(err, "Step 10. Check connectivity from pod to an external \"node\" and verify that the srcIP is the expected egressIP, failed, err: %v", err) + framework.ExpectNoError(err, "Step 12. Check connectivity from pod to an external \"node\" and verify that the srcIP is the expected egressIP, failed, err: %v", err) - ginkgo.By("11. Check connectivity from pod to another node and verify that the srcIP is the expected nodeIP") + ginkgo.By("13. Check connectivity from pod to another node primary IP and verify that the srcIP is the expected nodeIP") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(hostNetPod, pod1Name, podNamespace.Name, true, []string{egressNodeIP.String()})) - framework.ExpectNoError(err, "Step 11. Check connectivity from pod to another node and verify that the srcIP is the expected nodeIP, failed: %v", err) + framework.ExpectNoError(err, "Step 13. Check connectivity from pod to another node and verify that the srcIP is the expected nodeIP, failed: %v", err) - ginkgo.By("12. Create second pod not matching the EgressIP: running on egress1Node") + ginkgo.By("14. Check connectivity from pod to another node secondary IP and verify that the srcIP is the expected nodeIP") + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(otherHostNetPodIP, pod1Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + framework.ExpectNoError(err, "Step 14. Check connectivity from pod to another node and verify that the srcIP is the expected nodeIP, failed: %v", err) + + ginkgo.By("15. Create second pod not matching the EgressIP: running on egress1Node") createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, command, map[string]string{}) err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { kubectlOut := getPodAddress(pod2Name, f.Namespace.Name) @@ -737,25 +772,29 @@ spec: } return true, nil }) - framework.ExpectNoError(err, "Step 12. Create second pod not matching the EgressIP: running on egress1Node, failed, err: %v", err) + framework.ExpectNoError(err, "Step 15. Create second pod not matching the EgressIP: running on egress1Node, failed, err: %v", err) framework.Logf("Created pod %s on node %s", pod2Name, pod2Node.name) - ginkgo.By("13. Check connectivity from second pod to external node and verify that the srcIP is the expected nodeIP") + ginkgo.By("16. Check connectivity from second pod to external node and verify that the srcIP is the expected nodeIP") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, true, []string{egressNodeIP.String()})) - framework.ExpectNoError(err, "Step 13. Check connectivity from second pod to external node and verify that the srcIP is the expected nodeIP, failed: %v", err) + framework.ExpectNoError(err, "Step 16. Check connectivity from second pod to external node and verify that the srcIP is the expected nodeIP, failed: %v", err) - ginkgo.By("14. Add pod selector label to make second pod egressIP managed") + ginkgo.By("17. Add pod selector label to make second pod egressIP managed") pod2 := getPod(f, pod2Name) pod2.Labels = podEgressLabel updatePod(f, pod2) - ginkgo.By("15. Check connectivity from second pod to external node and verify that the srcIP is the expected egressIP") + ginkgo.By("18. Check connectivity from second pod to external node and verify that the srcIP is the expected egressIP") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, true, []string{egressIP1.String()})) - framework.ExpectNoError(err, "Step 15. Check connectivity from second pod to external node and verify that the srcIP is the expected egressIP, failed: %v", err) + framework.ExpectNoError(err, "Step 18. Check connectivity from second pod to external node and verify that the srcIP is the expected egressIP, failed: %v", err) - ginkgo.By("16. Check connectivity from second pod to another node and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode)") + ginkgo.By("19. Check connectivity from second pod to another node primary IP and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode)") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(hostNetPod, pod2Name, podNamespace.Name, true, []string{egressNodeIP.String()})) framework.ExpectNoError(err, "Step 16. Check connectivity from second pod to another node and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode), failed: %v", err) + + ginkgo.By("20. Check connectivity from second pod to another node secondary IP and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode)") + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(otherHostNetPodIP, pod2Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + framework.ExpectNoError(err, "Step 20. Check connectivity from second pod to another node and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode), failed: %v", err) }) // Validate the egress IP with stateful sets or pods recreated with same name From eccdf0498f88a36530f06a10ab1f8894099f362d Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 17 May 2023 17:14:01 +0300 Subject: [PATCH 48/90] Drop MgmtPortRepresentor config variable After introducing representor discovery for DPU mode MgmtPortRepresentor variable is not populated which cause ovnkube to mistakenly unconfigure management port netdevice or representor in the Full mode because it uses incorrect management interface name. Drop this variable as obsolete and use other existing config variables to determine when management interface name should have a post-fix. Fix affected unit-tests. Fixes: #3598 Signed-off-by: Dmytro Linkin --- go-controller/pkg/config/config.go | 3 +-- go-controller/pkg/config/config_test.go | 1 - go-controller/pkg/node/management-port-dpu.go | 2 +- go-controller/pkg/node/management-port.go | 2 +- .../pkg/node/management-port_dpu_test.go | 20 +++++++++---------- 5 files changed, 13 insertions(+), 15 deletions(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 95ca52efa0..0cb9e8b5f2 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -453,8 +453,7 @@ type OvnKubeNodeConfig struct { DPResourceDeviceIdsMap map[string][]string MgmtPortNetdev string `gcfg:"mgmt-port-netdev"` MgmtPortDPResourceName string `gcfg:"mgmt-port-dp-resource-name"` - MgmtPortRepresentor string - DisableOVNIfaceIdVer bool `gcfg:"disable-ovn-iface-id-ver"` + DisableOVNIfaceIdVer bool `gcfg:"disable-ovn-iface-id-ver"` } // ClusterManagerConfig holds configuration for ovnkube-cluster-manager diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 46db2c74ec..80e7fcad21 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -307,7 +307,6 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OvnKubeNode.Mode).To(gomega.Equal(types.NodeModeFull)) gomega.Expect(OvnKubeNode.MgmtPortNetdev).To(gomega.Equal("")) gomega.Expect(OvnKubeNode.MgmtPortDPResourceName).To(gomega.Equal("")) - gomega.Expect(OvnKubeNode.MgmtPortRepresentor).To(gomega.Equal("")) gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("")) gomega.Expect(Gateway.SingleNode).To(gomega.BeFalse()) gomega.Expect(Gateway.DisableForwarding).To(gomega.BeFalse()) diff --git a/go-controller/pkg/node/management-port-dpu.go b/go-controller/pkg/node/management-port-dpu.go index 2ee941f944..1773c10e84 100644 --- a/go-controller/pkg/node/management-port-dpu.go +++ b/go-controller/pkg/node/management-port-dpu.go @@ -31,7 +31,7 @@ func newManagementPortRepresentor(nodeName string, hostSubnets []*net.IPNet, rep func (mp *managementPortRepresentor) Create(_ *routeManager, nodeAnnotator kube.Annotator, waiter *startupWaiter) (*managementPortConfig, error) { k8sMgmtIntfName := types.K8sMgmtIntfName - if config.OvnKubeNode.MgmtPortRepresentor != "" { + if config.OvnKubeNode.Mode == types.NodeModeFull { k8sMgmtIntfName += "_0" } diff --git a/go-controller/pkg/node/management-port.go b/go-controller/pkg/node/management-port.go index 2d17cf57a5..d9b7a334b0 100644 --- a/go-controller/pkg/node/management-port.go +++ b/go-controller/pkg/node/management-port.go @@ -137,7 +137,7 @@ func (mp *managementPort) HasIpAddr() bool { func managementPortReady() (bool, error) { k8sMgmtIntfName := types.K8sMgmtIntfName - if config.OvnKubeNode.MgmtPortRepresentor != "" { + if config.OvnKubeNode.MgmtPortNetdev != "" { k8sMgmtIntfName += "_0" } // Get the OVS interface name for the Management Port diff --git a/go-controller/pkg/node/management-port_dpu_test.go b/go-controller/pkg/node/management-port_dpu_test.go index cebfb1ef72..e1a1db0881 100644 --- a/go-controller/pkg/node/management-port_dpu_test.go +++ b/go-controller/pkg/node/management-port_dpu_test.go @@ -20,7 +20,7 @@ import ( func genOVSAddMgmtPortCmd(nodeName, repName string) string { return fmt.Sprintf("ovs-vsctl --timeout=15 -- --may-exist add-port br-int %s -- set interface %s external-ids:iface-id=%s"+ " external-ids:ovn-orig-mgmt-port-rep-name=%s", - types.K8sMgmtIntfName, types.K8sMgmtIntfName, types.K8sPrefix+nodeName, repName) + types.K8sMgmtIntfName+"_0", types.K8sMgmtIntfName+"_0", types.K8sPrefix+nodeName, repName) } func mockOVSListInterfaceMgmtPortNotExistCmd(execMock *ovntest.FakeExec, mgmtPortName string) { @@ -88,12 +88,12 @@ var _ = Describe("Mananagement port DPU tests", func() { netlinkOpsMock.On("LinkByName", "enp3s0f0v0").Return( linkMock, nil) - netlinkOpsMock.On("LinkByName", types.K8sMgmtIntfName).Return( + netlinkOpsMock.On("LinkByName", types.K8sMgmtIntfName+"_0").Return( nil, fmt.Errorf("link not found")) netlinkOpsMock.On("IsLinkNotFoundError", mock.Anything).Return(true) netlinkOpsMock.On("LinkSetDown", linkMock).Return(nil) - netlinkOpsMock.On("LinkSetName", linkMock, types.K8sMgmtIntfName).Return(fmt.Errorf("failed to set name")) - mockOVSListInterfaceMgmtPortNotExistCmd(execMock, types.K8sMgmtIntfName) + netlinkOpsMock.On("LinkSetName", linkMock, types.K8sMgmtIntfName+"_0").Return(fmt.Errorf("failed to set name")) + mockOVSListInterfaceMgmtPortNotExistCmd(execMock, types.K8sMgmtIntfName+"_0") _, err := mgmtPortDpu.Create(nil, nodeAnnotatorMock, waiter) Expect(err).To(HaveOccurred()) @@ -115,14 +115,14 @@ var _ = Describe("Mananagement port DPU tests", func() { netlinkOpsMock.On("LinkByName", "enp3s0f0v0").Return( linkMock, nil) - netlinkOpsMock.On("LinkByName", types.K8sMgmtIntfName).Return( + netlinkOpsMock.On("LinkByName", types.K8sMgmtIntfName+"_0").Return( nil, fmt.Errorf("link not found")) netlinkOpsMock.On("IsLinkNotFoundError", mock.Anything).Return(true) netlinkOpsMock.On("LinkSetDown", linkMock).Return(nil) - netlinkOpsMock.On("LinkSetName", linkMock, types.K8sMgmtIntfName).Return(nil) + netlinkOpsMock.On("LinkSetName", linkMock, types.K8sMgmtIntfName+"_0").Return(nil) netlinkOpsMock.On("LinkSetMTU", linkMock, config.Default.MTU).Return(nil) netlinkOpsMock.On("LinkSetUp", linkMock).Return(nil) - mockOVSListInterfaceMgmtPortNotExistCmd(execMock, types.K8sMgmtIntfName) + mockOVSListInterfaceMgmtPortNotExistCmd(execMock, types.K8sMgmtIntfName+"_0") execMock.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: genOVSAddMgmtPortCmd(mgmtPortDpu.nodeName, mgmtPortDpu.repName), }) @@ -130,7 +130,7 @@ var _ = Describe("Mananagement port DPU tests", func() { mpcfg, err := mgmtPortDpu.Create(nil, nodeAnnotatorMock, waiter) Expect(execMock.CalledMatchesExpected()).To(BeTrue(), execMock.ErrorDesc) Expect(err).ToNot(HaveOccurred()) - Expect(mpcfg.ifName).To(Equal(types.K8sMgmtIntfName)) + Expect(mpcfg.ifName).To(Equal(types.K8sMgmtIntfName + "_0")) Expect(mpcfg.link).To(Equal(linkMock)) }) @@ -146,7 +146,7 @@ var _ = Describe("Mananagement port DPU tests", func() { } nodeAnnotatorMock.On("Set", mock.Anything, expectedMgmtPortMac.String()).Return(nil) linkMock := &mocks.Link{} - linkMock.On("Attrs").Return(&netlink.LinkAttrs{Name: "ovn-k8s-mp0", MTU: config.Default.MTU}) + linkMock.On("Attrs").Return(&netlink.LinkAttrs{Name: "ovn-k8s-mp0_0", MTU: config.Default.MTU}) netlinkOpsMock.On("LinkByName", "enp3s0f0v0").Return( linkMock, nil) @@ -158,7 +158,7 @@ var _ = Describe("Mananagement port DPU tests", func() { mpcfg, err := mgmtPortDpu.Create(nil, nodeAnnotatorMock, waiter) Expect(execMock.CalledMatchesExpected()).To(BeTrue(), execMock.ErrorDesc) Expect(err).ToNot(HaveOccurred()) - Expect(mpcfg.ifName).To(Equal(types.K8sMgmtIntfName)) + Expect(mpcfg.ifName).To(Equal(types.K8sMgmtIntfName + "_0")) Expect(mpcfg.link).To(Equal(linkMock)) }) }) From 225b19cf4cab3488f7702cb0639475e609489486 Mon Sep 17 00:00:00 2001 From: Jacob Tanenbaum Date: Mon, 15 May 2023 19:49:28 -0400 Subject: [PATCH 49/90] initial pods are not wired for hybrid overlay When toggling hybrid overlay the linux nodes can miss wiring the pods on start-up if allocating the hybrid overlay DRIP and DRMAC takes too long. Fix this issue by passing a podLister to the node controller and running the AddPods() command on all local pods once on startup after DRIP and DRMAC are set. Signed-off-by: Jacob Tanenbaum --- .../hybrid-overlay/pkg/controller/node.go | 3 +- .../pkg/controller/node_linux.go | 38 ++++++-- .../pkg/controller/node_linux_test.go | 92 ++++++++++++++++++- .../pkg/controller/node_windows.go | 1 + .../hybrid-overlay/pkg/types/types.go | 9 ++ 5 files changed, 127 insertions(+), 16 deletions(-) diff --git a/go-controller/hybrid-overlay/pkg/controller/node.go b/go-controller/hybrid-overlay/pkg/controller/node.go index cdf65da4e4..3d551ba257 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node.go +++ b/go-controller/hybrid-overlay/pkg/controller/node.go @@ -92,8 +92,9 @@ func NewNode( ) (*Node, error) { nodeLister := listers.NewNodeLister(nodeInformer.GetIndexer()) + podLister := listers.NewPodLister(podInformer.GetIndexer()) - controller, err := newNodeController(kube, nodeName, nodeLister) + controller, err := newNodeController(kube, nodeName, nodeLister, podLister) if err != nil { return nil, err } diff --git a/go-controller/hybrid-overlay/pkg/controller/node_linux.go b/go-controller/hybrid-overlay/pkg/controller/node_linux.go index b9da7e7d60..b51a578829 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/node_linux.go @@ -41,13 +41,12 @@ type flowCacheEntry struct { // NodeController is the node hybrid overlay controller type NodeController struct { - nodeName string - // an atomic uint32 for testing purposes 0 = uninitialized and 1 = initialized - initialized uint32 - drMAC net.HardwareAddr - drIP net.IP - gwLRPIP net.IP - vxlanPort uint16 + nodeName string + initState hotypes.HybridInitState + drMAC net.HardwareAddr + drIP net.IP + gwLRPIP net.IP + vxlanPort uint16 // contains a map of pods to corresponding tunnels flowCache map[string]*flowCacheEntry flowMutex sync.Mutex @@ -55,6 +54,7 @@ type NodeController struct { flowChan chan struct{} nodeLister listers.NodeLister + podLister listers.PodLister } // newNodeController returns a node handler that listens for node events @@ -66,16 +66,20 @@ func newNodeController( _ kube.Interface, nodeName string, nodeLister listers.NodeLister, + podLister listers.PodLister, ) (nodeController, error) { node := &NodeController{ nodeName: nodeName, + initState: new(uint32), vxlanPort: uint16(config.HybridOverlay.VXLANPort), flowCache: make(map[string]*flowCacheEntry), flowMutex: sync.Mutex{}, flowChan: make(chan struct{}, 1), nodeLister: nodeLister, + podLister: podLister, } + atomic.StoreUint32(node.initState, hotypes.InitialStartup) return node, nil } @@ -108,7 +112,7 @@ func (n *NodeController) AddPod(pod *kapi.Pod) error { // if the IP/MAC or Annotations have changed ignoreLearn := true - if atomic.LoadUint32(&n.initialized) == 0 { + if atomic.LoadUint32(n.initState) == hotypes.InitialStartup { node, err := n.nodeLister.Get(n.nodeName) if err != nil { return fmt.Errorf("hybrid overlay not initialized on %s, and failed to get node data: %v", @@ -269,6 +273,20 @@ func (n *NodeController) AddNode(node *kapi.Node) error { klog.Infof("Add hybridOverlay Node %s", node.Name) err = n.hybridOverlayNodeUpdate(node) } + if atomic.LoadUint32(n.initState) == hotypes.DistributedRouterInitialized { + pods, err := n.podLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("cannot fully initialize node %s for hybrid overlay, cannot list pods: %v", n.nodeName, err) + } + + for _, pod := range pods { + err := n.AddPod(pod) + if err != nil { + klog.Errorf("Cannot wire pod %s for hybrid overlay, %v", pod.Name, err) + } + } + atomic.StoreUint32(n.initState, hotypes.PodsInitialized) + } return err } @@ -476,7 +494,7 @@ func (n *NodeController) handleHybridOverlayMACIPChange(node *kapi.Node) error { // EnsureHybridOverlayBridge sets up the hybrid overlay bridge func (n *NodeController) EnsureHybridOverlayBridge(node *kapi.Node) error { - if atomic.LoadUint32(&n.initialized) == 1 { + if atomic.LoadUint32(n.initState) >= hotypes.DistributedRouterInitialized { if node.Annotations[hotypes.HybridOverlayDRIP] != n.drIP.String() || node.Annotations[hotypes.HybridOverlayDRMAC] != n.drMAC.String() { if err := n.handleHybridOverlayMACIPChange(node); err != nil { @@ -647,7 +665,7 @@ func (n *NodeController) EnsureHybridOverlayBridge(node *kapi.Node) error { n.updateFlowCacheEntry("0x0", flows, false) n.requestFlowSync() - atomic.StoreUint32(&n.initialized, 1) + atomic.StoreUint32(n.initState, hotypes.DistributedRouterInitialized) klog.Infof("Hybrid overlay setup complete for node %s", node.Name) return nil } diff --git a/go-controller/hybrid-overlay/pkg/controller/node_linux_test.go b/go-controller/hybrid-overlay/pkg/controller/node_linux_test.go index bc005fe523..fdaec61cc7 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node_linux_test.go +++ b/go-controller/hybrid-overlay/pkg/controller/node_linux_test.go @@ -7,6 +7,7 @@ import ( "strings" "sync" "sync/atomic" + "time" "github.com/urfave/cli/v2" v1 "k8s.io/api/core/v1" @@ -426,7 +427,7 @@ var _ = Describe("Hybrid Overlay Node Linux Operations", func() { linuxNode, okay := n.controller.(*NodeController) Expect(okay).To(BeTrue()) Eventually(func() bool { - return atomic.LoadUint32(&linuxNode.initialized) == 1 + return atomic.LoadUint32(linuxNode.initState) == hotypes.PodsInitialized }, 2).Should(BeTrue()) Eventually(fexec.CalledMatchesExpected, 2).Should(BeTrue(), fexec.ErrorDesc) @@ -484,7 +485,7 @@ var _ = Describe("Hybrid Overlay Node Linux Operations", func() { linuxNode, okay := n.controller.(*NodeController) Expect(okay).To(BeTrue()) Eventually(func() bool { - return atomic.LoadUint32(&linuxNode.initialized) == 1 + return atomic.LoadUint32(linuxNode.initState) == hotypes.PodsInitialized }, 2).Should(BeTrue()) Eventually(fexec.CalledMatchesExpected, 2).Should(BeTrue(), fexec.ErrorDesc) @@ -517,6 +518,87 @@ var _ = Describe("Hybrid Overlay Node Linux Operations", func() { } appRun(app) }) + ovntest.OnSupportedPlatformsIt("on startup will add a local linux pod that times out on the initial addPod event", func() { + app.Action = func(ctx *cli.Context) error { + const ( + pod1IP string = "1.2.3.5" + pod1CIDR string = pod1IP + "/24" + pod1MAC string = "aa:bb:cc:dd:ee:ff" + ) + + annotations := createNodeAnnotationsForSubnet(thisNodeSubnet) + annotations[hotypes.HybridOverlayDRMAC] = thisNodeDRMAC + annotations["k8s.ovn.org/node-gateway-router-lrp-ifaddr"] = "{\"ipv4\":\"100.64.0.3/16\"}" + annotations[hotypes.HybridOverlayDRIP] = thisNodeDRIP + node := createNode(thisNode, "linux", thisNodeIP, annotations) + testPod := createPod("test", "pod1", thisNode, pod1CIDR, pod1MAC) + fakeClient := fake.NewSimpleClientset( + //&v1.NodeList{ + // Items: []v1.Node{*node}, + //}, + &v1.PodList{ + Items: []v1.Pod{*testPod}, + }, + ) + + // Node setup from initial node sync + addNodeSetupCmds(fexec, thisNode) + _, err := config.InitConfig(ctx, fexec, nil) + Expect(err).NotTo(HaveOccurred()) + + f := informers.NewSharedInformerFactory(fakeClient, informer.DefaultResyncInterval) + + n, err := NewNode( + &kube.Kube{KClient: fakeClient}, + thisNode, + f.Core().V1().Nodes().Informer(), + f.Core().V1().Pods().Informer(), + informer.NewTestEventHandler, + ) + Expect(err).NotTo(HaveOccurred()) + + addEnsureHybridOverlayBridgeMocks(nlMock, thisNodeDRIP, "") + // initial flowSync + addSyncFlows(fexec) + // flowsync after EnsureHybridOverlayBridge() + addSyncFlows(fexec) + addSyncFlows(fexec) + + f.Start(stopChan) + wg.Add(1) + go func() { + defer wg.Done() + n.Run(stopChan) + }() + + linuxNode, okay := n.controller.(*NodeController) + Expect(okay).To(BeTrue()) + time.Sleep(2 * time.Second) + _, err = fakeClient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() bool { + return atomic.LoadUint32(linuxNode.initState) == hotypes.PodsInitialized + }, 2).Should(BeTrue()) + + initialFlowCache := map[string]*flowCacheEntry{ + "0x0": generateInitialFlowCacheEntry(mgmtIfAddr.IP.String(), thisNodeDRIP, thisNodeDRMAC), + } + + initialFlowCache[podIPToCookie(net.ParseIP(pod1IP))] = &flowCacheEntry{ + flows: []string{"table=10,cookie=0x" + podIPToCookie(net.ParseIP(pod1IP)) + ",priority=100,ip,nw_dst=" + pod1IP + ",actions=set_field:" + thisNodeDRMAC + "->eth_src,set_field:" + pod1MAC + "->eth_dst,output:ext"}, + ignoreLearn: true, + } + Eventually(func() error { + linuxNode.flowMutex.Lock() + defer linuxNode.flowMutex.Unlock() + return compareFlowCache(linuxNode.flowCache, initialFlowCache) + }, 2).Should(BeNil()) + Eventually(fexec.CalledMatchesExpected, 2).Should(BeTrue(), fexec.ErrorDesc) + return nil + } + appRun(app) + }) ovntest.OnSupportedPlatformsIt("sets up tunnels for Windows nodes", func() { app.Action = func(ctx *cli.Context) error { @@ -570,7 +652,7 @@ var _ = Describe("Hybrid Overlay Node Linux Operations", func() { linuxNode, okay := n.controller.(*NodeController) Expect(okay).To(BeTrue()) Eventually(func() bool { - return atomic.LoadUint32(&linuxNode.initialized) == 1 + return atomic.LoadUint32(linuxNode.initState) == hotypes.PodsInitialized }, 2).Should(BeTrue()) Eventually(fexec.CalledMatchesExpected, 2).Should(BeTrue(), fexec.ErrorDesc) @@ -669,7 +751,7 @@ var _ = Describe("Hybrid Overlay Node Linux Operations", func() { linuxNode, okay := n.controller.(*NodeController) Expect(okay).To(BeTrue()) Eventually(func() bool { - return atomic.LoadUint32(&linuxNode.initialized) == 1 + return atomic.LoadUint32(linuxNode.initState) == hotypes.PodsInitialized }, 2).Should(BeTrue()) Eventually(fexec.CalledMatchesExpected, 2).Should(BeTrue(), fexec.ErrorDesc) @@ -804,7 +886,7 @@ var _ = Describe("Hybrid Overlay Node Linux Operations", func() { linuxNode, okay := n.controller.(*NodeController) Expect(okay).To(BeTrue()) Eventually(func() bool { - return atomic.LoadUint32(&linuxNode.initialized) == 1 + return atomic.LoadUint32(linuxNode.initState) == hotypes.PodsInitialized }, 2).Should(BeTrue()) Eventually(fexec.CalledMatchesExpected, 2).Should(BeTrue(), fexec.ErrorDesc) diff --git a/go-controller/hybrid-overlay/pkg/controller/node_windows.go b/go-controller/hybrid-overlay/pkg/controller/node_windows.go index e1c5571c23..d073b2b6fa 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node_windows.go +++ b/go-controller/hybrid-overlay/pkg/controller/node_windows.go @@ -39,6 +39,7 @@ type NodeController struct { func newNodeController(kube kube.Interface, nodeName string, nodeLister listers.NodeLister, + podLister listers.PodLister, ) (nodeController, error) { supportedFeatures := hcn.GetSupportedFeatures() if !supportedFeatures.HostRoute { diff --git a/go-controller/hybrid-overlay/pkg/types/types.go b/go-controller/hybrid-overlay/pkg/types/types.go index 9793bbf48f..449374ab98 100644 --- a/go-controller/hybrid-overlay/pkg/types/types.go +++ b/go-controller/hybrid-overlay/pkg/types/types.go @@ -4,6 +4,15 @@ import ( kapi "k8s.io/api/core/v1" ) +type HybridInitState *uint32 + +// these constants represent the initialization states of a linux node +const ( + InitialStartup = iota + DistributedRouterInitialized + PodsInitialized +) + const ( // HybridOverlayAnnotationBase holds the hybrid overlay annotation base HybridOverlayAnnotationBase = "k8s.ovn.org/hybrid-overlay-" From 66be8ac4f55b45e0864feac06a6e2962156d9f0f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 May 2023 10:40:27 -0500 Subject: [PATCH 50/90] ovn: assume ACL logging is always supported MeterBand and Meters have been supported for ACLs for a long time; any ovnkube user should be running an OVN that supports them. Signed-off-by: Dan Williams --- .../network_controller_manager.go | 19 ++++++------------- .../pkg/ovn/base_network_controller.go | 5 +---- .../ovn/base_network_controller_namespace.go | 4 ++-- go-controller/pkg/ovn/ovn_test.go | 2 -- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index d04ab7b926..e5a7d196cb 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -44,8 +44,6 @@ type networkControllerManager struct { multicastSupport bool // Supports OVN Template Load Balancers? svcTemplateSupport bool - // Is ACL logging enabled while configuring meters? - aclLoggingEnabled bool stopChan chan struct{} wg *sync.WaitGroup @@ -303,19 +301,10 @@ func (cm *networkControllerManager) createACLLoggingMeter() error { return nil } -func (cm *networkControllerManager) enableACLLoggingSupport() { - cm.aclLoggingEnabled = true - if err := cm.createACLLoggingMeter(); err != nil { - klog.Warningf("ACL logging support enabled, however acl-logging meter could not be created: %v. "+ - "Disabling ACL logging support", err) - cm.aclLoggingEnabled = false - } -} - // newCommonNetworkControllerInfo creates and returns the common networkController info func (cm *networkControllerManager) newCommonNetworkControllerInfo() (*ovn.CommonNetworkControllerInfo, error) { return ovn.NewCommonNetworkControllerInfo(cm.client, cm.kube, cm.watchFactory, cm.recorder, cm.nbClient, - cm.sbClient, cm.podRecorder, cm.SCTPSupport, cm.multicastSupport, cm.svcTemplateSupport, cm.aclLoggingEnabled) + cm.sbClient, cm.podRecorder, cm.SCTPSupport, cm.multicastSupport, cm.svcTemplateSupport) } // initDefaultNetworkController creates the controller for default network @@ -363,7 +352,11 @@ func (cm *networkControllerManager) Start(ctx context.Context) error { cm.configureMulticastSupport() cm.configureSvcTemplateSupport() - cm.enableACLLoggingSupport() + + err = cm.createACLLoggingMeter() + if err != nil { + return nil + } err = cm.enableOVNLogicalDataPathGroups() if err != nil { diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index afe40a776d..9d4330b883 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -61,8 +61,6 @@ type CommonNetworkControllerInfo struct { // Supports OVN Template Load Balancers? svcTemplateSupport bool - // Is ACL logging enabled while configuring meters? - aclLoggingEnabled bool // Northbound database zone name to which this Controller is connected to - aka local zone zone string @@ -153,7 +151,7 @@ type BaseSecondaryNetworkController struct { // NewCommonNetworkControllerInfo creates CommonNetworkControllerInfo shared by controllers func NewCommonNetworkControllerInfo(client clientset.Interface, kube *kube.KubeOVN, wf *factory.WatchFactory, recorder record.EventRecorder, nbClient libovsdbclient.Client, sbClient libovsdbclient.Client, - podRecorder *metrics.PodRecorder, SCTPSupport, multicastSupport, svcTemplateSupport, aclLoggingEnabled bool) (*CommonNetworkControllerInfo, error) { + podRecorder *metrics.PodRecorder, SCTPSupport, multicastSupport, svcTemplateSupport bool) (*CommonNetworkControllerInfo, error) { zone, err := util.GetNBZone(nbClient) if err != nil { return nil, fmt.Errorf("error getting NB zone name : err - %w", err) @@ -169,7 +167,6 @@ func NewCommonNetworkControllerInfo(client clientset.Interface, kube *kube.KubeO SCTPSupport: SCTPSupport, multicastSupport: multicastSupport, svcTemplateSupport: svcTemplateSupport, - aclLoggingEnabled: aclLoggingEnabled, zone: zone, }, nil } diff --git a/go-controller/pkg/ovn/base_network_controller_namespace.go b/go-controller/pkg/ovn/base_network_controller_namespace.go index fe817486c4..b20f1cfaf8 100644 --- a/go-controller/pkg/ovn/base_network_controller_namespace.go +++ b/go-controller/pkg/ovn/base_network_controller_namespace.go @@ -105,8 +105,8 @@ func (bnc *BaseNetworkController) aclLoggingUpdateNsInfo(annotation string, nsIn var aclLevels ACLLoggingLevels var errors []error - // If logging is disabled or if the annotation is "" or "{}", use empty strings. Otherwise, parse the annotation. - if bnc.aclLoggingEnabled && annotation != "" && annotation != "{}" { + // If the annotation is "" or "{}", use empty strings. Otherwise, parse the annotation. + if annotation != "" && annotation != "{}" { err := json.Unmarshal([]byte(annotation), &aclLevels) if err != nil { // Disable Allow and Deny logging to ensure idempotency. diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index 0fabcad241..53f5caacea 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -236,7 +236,6 @@ func NewOvnController(ovnClient *util.OVNMasterClientset, wf *factory.WatchFacto false, // sctp support false, // multicast support true, // templates support - true, // acl logging enabled ) if err != nil { return nil, err @@ -342,7 +341,6 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt false, // sctp support false, // multicast support true, // templates support - true, // acl logging enabled ) if err != nil { return err From e15c43a89c483b6c03a9f1dd5a0195288b02033f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 May 2023 10:53:06 -0500 Subject: [PATCH 51/90] ovn: assume Logical Datapath Groups are always supported DP Groups have existed since 2020, defaulted to enabled in OVN 21.09, and the option to disable them was removed in OVN 22.09. Signed-off-by: Dan Williams --- .../network_controller_manager.go | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index e5a7d196cb..9463df5a7a 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -249,21 +249,6 @@ func (cm *networkControllerManager) configureSvcTemplateSupport() { } } -// enableOVNLogicalDataPathGroups sets an OVN flag to enable logical datapath -// groups on OVN 20.12 and later. The option is ignored if OVN doesn't -// understand it. Logical datapath groups reduce the size of the southbound -// database in large clusters. ovn-controllers should be upgraded to a version -// that supports them before the option is turned on by the master. -func (cm *networkControllerManager) enableOVNLogicalDataPathGroups() error { - nbGlobal := nbdb.NBGlobal{ - Options: map[string]string{"use_logical_dp_groups": "true"}, - } - if err := libovsdbops.UpdateNBGlobalSetOptions(cm.nbClient, &nbGlobal); err != nil { - return fmt.Errorf("failed to set NB global option to enable logical datapath groups: %v", err) - } - return nil -} - func (cm *networkControllerManager) configureMetrics(stopChan <-chan struct{}) { metrics.RegisterMasterPerformance(cm.nbClient) metrics.RegisterMasterFunctional() @@ -358,11 +343,6 @@ func (cm *networkControllerManager) Start(ctx context.Context) error { return nil } - err = cm.enableOVNLogicalDataPathGroups() - if err != nil { - return err - } - if config.Metrics.EnableConfigDuration { // with k=10, // for a cluster with 10 nodes, measurement of 1 in every 100 requests From 00c044b6c205f35752529527a729221b89f2f755 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 May 2023 10:03:10 -0500 Subject: [PATCH 52/90] network-controller: assume multicast/IGMP_Group is supported by OVN OVN has multicast support since 2.12 long long ago; we don't need to check the Southbound for it anymore. Multicast will still be disabled if turned off via config or for secondary networks. Signed-off-by: Dan Williams --- .../network_controller_manager.go | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index 9463df5a7a..c5f97cc555 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -199,8 +199,9 @@ func NewNetworkControllerManager(ovnClient *util.OVNClientset, identity string, sbClient: libovsdbOvnSBClient, podRecorder: &podRecorder, - wg: wg, - identity: identity, + wg: wg, + identity: identity, + multicastSupport: config.EnableMulticast, } var err error @@ -228,17 +229,6 @@ func (cm *networkControllerManager) configureSCTPSupport() error { return nil } -func (cm *networkControllerManager) configureMulticastSupport() { - cm.multicastSupport = config.EnableMulticast - if cm.multicastSupport { - if _, _, err := util.RunOVNSbctl("--columns=_uuid", "list", "IGMP_Group"); err != nil { - klog.Warningf("Multicast support enabled, however version of OVN in use does not support IGMP Group. " + - "Disabling Multicast Support") - cm.multicastSupport = false - } - } -} - func (cm *networkControllerManager) configureSvcTemplateSupport() { if _, _, err := util.RunOVNNbctl("--columns=_uuid", "list", "Chassis_Template_Var"); err != nil { klog.Warningf("Version of OVN in use does not support Chassis_Template_Var. " + @@ -335,7 +325,6 @@ func (cm *networkControllerManager) Start(ctx context.Context) error { return err } - cm.configureMulticastSupport() cm.configureSvcTemplateSupport() err = cm.createACLLoggingMeter() From ea40ca942242d3a82a27c7f95224f190602b71c2 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 4 May 2023 15:28:49 +0200 Subject: [PATCH 53/90] Allow a scenario where there is only IPv4 default GW specified in a dual-stack cluster Previously it was allowed to have the IPv6 default GW without having the IPv4 one, we should allow the same in the oposite scenario. Signed-off-by: Patryk Diak --- go-controller/pkg/node/helper_linux.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/go-controller/pkg/node/helper_linux.go b/go-controller/pkg/node/helper_linux.go index d3e0204207..5f18719f7e 100644 --- a/go-controller/pkg/node/helper_linux.go +++ b/go-controller/pkg/node/helper_linux.go @@ -37,14 +37,11 @@ func getDefaultGatewayInterfaceDetails(gwIface string) (string, []net.IP, error) return "", gatewayIPs, err } - // validate that both IP Families use the same interface for the gateway - // if we found a v4 default gw inf then we should expect v6 default gw inf also - if intfName != "" && intfIPv6Name == "" { - return "", nil, fmt.Errorf("failed to find IPV6 default gateway from interface %q", gwIface) - } + // if there is an interface specified for both IP families + // validate they use the same one if intfName == "" { intfName = intfIPv6Name - } else if intfName != intfIPv6Name { + } else if (len(intfName) > 0 && len(intfIPv6Name) > 0) && intfName != intfIPv6Name { return "", nil, fmt.Errorf("multiple gateway interfaces detected: %s %s", intfName, intfIPv6Name) } gatewayIPs = append(gatewayIPs, gw) From a2eee186479b27a665f3690de6dc7e9eb77d3301 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 4 May 2023 15:33:22 +0200 Subject: [PATCH 54/90] getDefaultGatewayInterfaceDetails: do not return empty gateway IPs Only add the gw IP if it is not empty, the previous approach resulted in adding a `` IP when there was no default GW: next-hops":["","fe80::5054:ff:fedf:631e"] Signed-off-by: Patryk Diak --- go-controller/pkg/node/helper_linux.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/node/helper_linux.go b/go-controller/pkg/node/helper_linux.go index 5f18719f7e..fafcf9ec4b 100644 --- a/go-controller/pkg/node/helper_linux.go +++ b/go-controller/pkg/node/helper_linux.go @@ -28,7 +28,11 @@ func getDefaultGatewayInterfaceDetails(gwIface string) (string, []net.IP, error) return "", gatewayIPs, err } intfName = intfIPv4Name - gatewayIPs = append(gatewayIPs, gw) + + // only add the GW IP if it is specified + if len(gw) != 0 { + gatewayIPs = append(gatewayIPs, gw) + } } if config.IPv6Mode { @@ -44,7 +48,11 @@ func getDefaultGatewayInterfaceDetails(gwIface string) (string, []net.IP, error) } else if (len(intfName) > 0 && len(intfIPv6Name) > 0) && intfName != intfIPv6Name { return "", nil, fmt.Errorf("multiple gateway interfaces detected: %s %s", intfName, intfIPv6Name) } - gatewayIPs = append(gatewayIPs, gw) + + // only add the GW IP if it is specified + if len(gw) != 0 { + gatewayIPs = append(gatewayIPs, gw) + } } return intfName, gatewayIPs, nil From d5dddfb875b31037c100440cef1053c8e9738d27 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 4 May 2023 15:35:55 +0200 Subject: [PATCH 55/90] Add unit test for getDefaultGatewayInterfaceByFamily and getDefaultGatewayInterfaceDetails This commit additionally refactors getDefaultGatewayInterfaceDetails to make it independent of the global config. Signed-off-by: Patryk Diak --- go-controller/pkg/node/gateway_init.go | 2 +- go-controller/pkg/node/helper_linux.go | 7 +- go-controller/pkg/node/helper_linux_test.go | 271 ++++++++++++++++++++ 3 files changed, 275 insertions(+), 5 deletions(-) diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index 06f4795403..de4fbfa9e6 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -187,7 +187,7 @@ func getGatewayNextHops() ([]net.IP, string, error) { } if needIPv4NextHop || needIPv6NextHop || gatewayIntf == "" { - defaultGatewayIntf, defaultGatewayNextHops, err := getDefaultGatewayInterfaceDetails(gatewayIntf) + defaultGatewayIntf, defaultGatewayNextHops, err := getDefaultGatewayInterfaceDetails(gatewayIntf, config.IPv4Mode, config.IPv6Mode) if err != nil { return nil, "", err } diff --git a/go-controller/pkg/node/helper_linux.go b/go-controller/pkg/node/helper_linux.go index fafcf9ec4b..ea86b10814 100644 --- a/go-controller/pkg/node/helper_linux.go +++ b/go-controller/pkg/node/helper_linux.go @@ -7,7 +7,6 @@ import ( "fmt" "net" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/pkg/errors" "github.com/vishvananda/netlink" @@ -18,11 +17,11 @@ import ( // which the default gateway (for route to 0.0.0.0) is configured. // optionally pass the pre-determined gateway interface // It also returns the default gateways themselves. -func getDefaultGatewayInterfaceDetails(gwIface string) (string, []net.IP, error) { +func getDefaultGatewayInterfaceDetails(gwIface string, ipV4Mode, ipV6Mode bool) (string, []net.IP, error) { var intfName string var gatewayIPs []net.IP - if config.IPv4Mode { + if ipV4Mode { intfIPv4Name, gw, err := getDefaultGatewayInterfaceByFamily(netlink.FAMILY_V4, gwIface) if err != nil { return "", gatewayIPs, err @@ -35,7 +34,7 @@ func getDefaultGatewayInterfaceDetails(gwIface string) (string, []net.IP, error) } } - if config.IPv6Mode { + if ipV6Mode { intfIPv6Name, gw, err := getDefaultGatewayInterfaceByFamily(netlink.FAMILY_V6, gwIface) if err != nil { return "", gatewayIPs, err diff --git a/go-controller/pkg/node/helper_linux_test.go b/go-controller/pkg/node/helper_linux_test.go index ebf2088418..f5d2d2ae36 100644 --- a/go-controller/pkg/node/helper_linux_test.go +++ b/go-controller/pkg/node/helper_linux_test.go @@ -1,9 +1,16 @@ package node import ( + "fmt" + "net" "reflect" "testing" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + netlink_mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + util_mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/mocks" + "github.com/stretchr/testify/assert" "github.com/vishvananda/netlink" ) @@ -174,3 +181,267 @@ func TestFilterRoutesByIfIndex(t *testing.T) { } } } + +func TestGetDefaultGatewayInterfaceByFamily(t *testing.T) { + mockNetLinkOps := new(util_mocks.NetLinkOps) + mockLink := new(netlink_mocks.Link) + // below sets the `netLinkOps` in util/net_linux.go to a mock instance for purpose of unit tests execution + util.SetNetLinkOpMockInst(mockNetLinkOps) + defer util.ResetNetLinkOpMockInst() + + defaultIf := "testInterface" + customIf := "customTestInterface" + defaultGWIP := ovntest.MustParseIP("1.1.1.1") + customGWIP := ovntest.MustParseIP("fd99::1") + + tests := []struct { + desc string + ipFamily int + gwIface string + expIntfName string + expGatewayIP net.IP + expErr bool + netLinkOpsMockHelper []ovntest.TestifyMockHelper + linkMockHelper []ovntest.TestifyMockHelper + }{ + { + desc: "no default routes returns empty values", + ipFamily: netlink.FAMILY_V4, + expGatewayIP: net.IP{}, + netLinkOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{}, nil}}, + }, + }, + { + desc: "first default route is used when no gw is specified", + gwIface: "", + ipFamily: netlink.FAMILY_V4, + expIntfName: defaultIf, + expGatewayIP: defaultGWIP, + netLinkOpsMockHelper: []ovntest.TestifyMockHelper{ + + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ + { + LinkIndex: 2, + Gw: defaultGWIP, + }, + { + LinkIndex: 9, + Gw: ovntest.MustParseIP("3.3.3.3"), + }, + }, nil}}, + {OnCallMethodName: "LinkByIndex", OnCallMethodArgType: []string{"int"}, RetArgList: []interface{}{mockLink, nil}}, + }, + linkMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + }, + }, + { + desc: "only routes from the provided GW are considered", + gwIface: customIf, + ipFamily: netlink.FAMILY_V6, + expIntfName: customIf, + expGatewayIP: customGWIP, + netLinkOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "LinkByName", OnCallMethodArgType: []string{"string", "string"}, RetArgList: []interface{}{mockLink, nil}}, + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ + { + LinkIndex: 1, + Gw: defaultGWIP, + }, + { + LinkIndex: 2, + Gw: customGWIP, + }, + }, nil}}, + {OnCallMethodName: "LinkByIndex", OnCallMethodArgType: []string{"int"}, RetArgList: []interface{}{mockLink, nil}}, + }, + linkMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Index: 2}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Index: 2}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: customIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: customIf}}}, + }, + }, + } + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + ovntest.ProcessMockFnList(&mockNetLinkOps.Mock, tc.netLinkOpsMockHelper) + ovntest.ProcessMockFnList(&mockLink.Mock, tc.linkMockHelper) + intfName, gwIP, err := getDefaultGatewayInterfaceByFamily(tc.ipFamily, tc.gwIface) + if intfName != tc.expIntfName { + t.Fatalf("TestGetDefaultGatewayInterfaceByFamily(%d): Default gateway interface should be '%v' but got '%v'", + i, tc.expIntfName, intfName) + } + if !reflect.DeepEqual(tc.expGatewayIP, gwIP) { + t.Fatalf("TestGetDefaultGatewayInterfaceByFamily(%d): Default gateway IP should be '%v' but got '%v'", + i, tc.expGatewayIP, gwIP) + } + + t.Log(err) + if tc.expErr { + assert.Error(t, err) + } else { + assert.Nil(t, err) + } + mockNetLinkOps.AssertExpectations(t) + mockLink.AssertExpectations(t) + }) + } +} + +func TestGetDefaultGatewayInterfaceDetails(t *testing.T) { + mockNetLinkOps := new(util_mocks.NetLinkOps) + mockLink := new(netlink_mocks.Link) + // below sets the `netLinkOps` in util/net_linux.go to a mock instance for purpose of unit tests execution + util.SetNetLinkOpMockInst(mockNetLinkOps) + defer util.ResetNetLinkOpMockInst() + + defaultIf := "testInterface" + defaultGWIPv4 := ovntest.MustParseIP("1.1.1.1") + defaultGWIPv6 := ovntest.MustParseIP("fd99::1") + + tests := []struct { + desc string + ipV4Mode bool + ipV6Mode bool + gwIface string + expIntfName string + expGatewayIPs []net.IP + expErr bool + netLinkOpsMockHelper []ovntest.TestifyMockHelper + linkMockHelper []ovntest.TestifyMockHelper + }{ + { + desc: "no default routes returns empty values", + ipV4Mode: true, + netLinkOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{}, nil}}, + }, + }, + { + desc: "only ipv4 GW set in dual-stack returns valid interface and one gw", + ipV4Mode: true, + ipV6Mode: true, + expGatewayIPs: []net.IP{defaultGWIPv4}, + expIntfName: defaultIf, + netLinkOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ + { + LinkIndex: 1, + Gw: defaultGWIPv4, + }, + }, nil}}, + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{}, nil}}, + {OnCallMethodName: "LinkByIndex", OnCallMethodArgType: []string{"int"}, RetArgList: []interface{}{mockLink, nil}}, + }, + linkMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + }, + }, + { + desc: "only ipv6 GW set in dual-stack returns valid interface and one gw", + ipV4Mode: true, + ipV6Mode: true, + expGatewayIPs: []net.IP{defaultGWIPv6}, + expIntfName: defaultIf, + netLinkOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{}, nil}}, + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ + { + LinkIndex: 1, + Gw: defaultGWIPv6, + }, + }, nil}}, + {OnCallMethodName: "LinkByIndex", OnCallMethodArgType: []string{"int"}, RetArgList: []interface{}{mockLink, nil}}, + }, + linkMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + }, + }, + { + desc: "in dual-stack the function fails if the default GWs are on different interfaces", + ipV4Mode: true, + ipV6Mode: true, + expErr: true, + netLinkOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ + { + LinkIndex: 1, + Gw: defaultGWIPv4, + }, + }, nil}}, + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ + { + LinkIndex: 2, + Gw: defaultGWIPv6, + }, + }, nil}}, + {OnCallMethodName: "LinkByIndex", OnCallMethodArgType: []string{"int"}, RetArgList: []interface{}{mockLink, nil}}, + {OnCallMethodName: "LinkByIndex", OnCallMethodArgType: []string{"int"}, RetArgList: []interface{}{mockLink, nil}}, + }, + linkMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "invalidInterface"}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: "invalidInterface"}}}, + }, + }, + { + desc: "in dual-stack the function returns both GW ips", + ipV4Mode: true, + ipV6Mode: true, + expGatewayIPs: []net.IP{defaultGWIPv4, defaultGWIPv6}, + expIntfName: defaultIf, + netLinkOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ + { + LinkIndex: 1, + Gw: defaultGWIPv4, + }, + }, nil}}, + {OnCallMethodName: "RouteListFiltered", OnCallMethodArgType: []string{"int", "*netlink.Route", "uint64"}, RetArgList: []interface{}{[]netlink.Route{ + { + LinkIndex: 1, + Gw: defaultGWIPv6, + }, + }, nil}}, + {OnCallMethodName: "LinkByIndex", OnCallMethodArgType: []string{"int"}, RetArgList: []interface{}{mockLink, nil}}, + {OnCallMethodName: "LinkByIndex", OnCallMethodArgType: []string{"int"}, RetArgList: []interface{}{mockLink, nil}}, + }, + linkMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + {OnCallMethodName: "Attrs", OnCallMethodArgType: []string{}, RetArgList: []interface{}{&netlink.LinkAttrs{Name: defaultIf}}}, + }, + }, + } + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + ovntest.ProcessMockFnList(&mockNetLinkOps.Mock, tc.netLinkOpsMockHelper) + ovntest.ProcessMockFnList(&mockLink.Mock, tc.linkMockHelper) + intfName, gwIPs, err := getDefaultGatewayInterfaceDetails(tc.gwIface, tc.ipV4Mode, tc.ipV6Mode) + if intfName != tc.expIntfName { + t.Fatalf("TestGetDefaultGatewayInterfaceDetails(%d): Default gateway interface should be '%v' but got '%v'", + i, tc.expIntfName, intfName) + } + if !reflect.DeepEqual(tc.expGatewayIPs, gwIPs) { + t.Fatalf("TestGetDefaultGatewayInterfaceDetails(%d): Default gateway IPs should be '%v' but got '%v'", + i, tc.expGatewayIPs, gwIPs) + } + + t.Log(err) + if tc.expErr { + assert.Error(t, err) + } else { + assert.Nil(t, err) + } + mockNetLinkOps.AssertExpectations(t) + mockLink.AssertExpectations(t) + }) + } +} From 0809d861c831931bcc71e17bacab9d93fdb91bfa Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 16 May 2023 13:51:55 -0500 Subject: [PATCH 56/90] Always use OVN's iface-id-ver option in non-DPUHost mode iface-id-ver was added in OVN 21.09; all ovn-kubernetes users should *definitely* be using a much newer OVN. So we can remove all the complicated logic around detecting if OVN supports it, and just assume we can always use it unless we're in DPUHost mode. Signed-off-by: Dan Williams --- go-controller/pkg/cni/cni.go | 8 ++-- go-controller/pkg/cni/cniserver.go | 21 +-------- go-controller/pkg/cni/cniserver_test.go | 4 +- go-controller/pkg/cni/ovs.go | 9 ++-- go-controller/pkg/cni/types.go | 4 +- go-controller/pkg/cni/utils.go | 5 +-- go-controller/pkg/cni/utils_test.go | 20 ++------- go-controller/pkg/config/config.go | 11 +++-- .../node_network_controller_manager.go | 39 +++-------------- .../node/base_node_network_controller_dpu.go | 11 ++--- .../base_node_network_controller_dpu_test.go | 2 +- .../node/default_node_network_controller.go | 43 ++++--------------- .../pkg/node/gateway_init_linux_test.go | 2 +- go-controller/pkg/node/ovn_test.go | 2 +- go-controller/pkg/util/ovn.go | 19 -------- 15 files changed, 49 insertions(+), 151 deletions(-) diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index eef9acdbd7..f5af607b0c 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -98,7 +98,7 @@ func (pr *PodRequest) checkOrUpdatePodUID(pod *kapi.Pod) error { return nil } -func (pr *PodRequest) cmdAdd(kubeAuth *KubeAPIAuth, clientset *ClientSet, useOVSExternalIDs bool) (*Response, error) { +func (pr *PodRequest) cmdAdd(kubeAuth *KubeAPIAuth, clientset *ClientSet) (*Response, error) { namespace := pr.PodNamespace podName := pr.PodName if namespace == "" || podName == "" { @@ -137,7 +137,7 @@ func (pr *PodRequest) cmdAdd(kubeAuth *KubeAPIAuth, clientset *ClientSet, useOVS return nil, err } - podInterfaceInfo, err := PodAnnotation2PodInfo(annotations, podNADAnnotation, useOVSExternalIDs, pr.PodUID, netdevName, + podInterfaceInfo, err := PodAnnotation2PodInfo(annotations, podNADAnnotation, pr.PodUID, netdevName, pr.nadName, pr.netName, pr.CNIConf.MTU) if err != nil { return nil, err @@ -247,7 +247,7 @@ func (pr *PodRequest) cmdCheck() error { // Argument '*PodRequest' encapsulates all the necessary information // kclient is passed in so that clientset can be reused from the server // Return value is the actual bytes to be sent back without further processing. -func HandlePodRequest(request *PodRequest, clientset *ClientSet, useOVSExternalIDs bool, kubeAuth *KubeAPIAuth) ([]byte, error) { +func HandlePodRequest(request *PodRequest, clientset *ClientSet, kubeAuth *KubeAPIAuth) ([]byte, error) { var result, resultForLogging []byte var response *Response var err, err1 error @@ -255,7 +255,7 @@ func HandlePodRequest(request *PodRequest, clientset *ClientSet, useOVSExternalI klog.Infof("%s %s starting CNI request %+v", request, request.Command, request) switch request.Command { case CNIAdd: - response, err = request.cmdAdd(kubeAuth, clientset, useOVSExternalIDs) + response, err = request.cmdAdd(kubeAuth, clientset) case CNIDel: response, err = request.cmdDel(clientset) case CNICheck: diff --git a/go-controller/pkg/cni/cniserver.go b/go-controller/pkg/cni/cniserver.go index 0413211c0f..09aab11719 100644 --- a/go-controller/pkg/cni/cniserver.go +++ b/go-controller/pkg/cni/cniserver.go @@ -8,7 +8,6 @@ import ( "io/ioutil" "net/http" "strings" - "sync/atomic" "time" "github.com/gorilla/mux" @@ -49,23 +48,16 @@ import ( // started. // NewCNIServer creates and returns a new Server object which will listen on a socket in the given path -func NewCNIServer(useOVSExternalIDs bool, factory factory.NodeWatchFactory, kclient kubernetes.Interface) (*Server, error) { +func NewCNIServer(factory factory.NodeWatchFactory, kclient kubernetes.Interface) (*Server, error) { if config.OvnKubeNode.Mode == types.NodeModeDPU { return nil, fmt.Errorf("unsupported ovnkube-node mode for CNI server: %s", config.OvnKubeNode.Mode) } router := mux.NewRouter() - // we use atomic lib to store port binding mode state, so use int32 to represent bool - var ovnPortBinding int32 - if useOVSExternalIDs { - ovnPortBinding = 1 - } - s := &Server{ Server: http.Server{ Handler: router, }, - useOVSExternalIDs: ovnPortBinding, clientSet: &ClientSet{ podLister: corev1listers.NewPodLister(factory.LocalPodInformer().GetIndexer()), kclient: kclient, @@ -220,11 +212,7 @@ func (s *Server) handleCNIRequest(r *http.Request) ([]byte, error) { } defer req.cancel() - useOVSExternalIDs := false - if atomic.LoadInt32(&s.useOVSExternalIDs) > 0 { - useOVSExternalIDs = true - } - result, err := s.handlePodRequestFunc(req, s.clientSet, useOVSExternalIDs, s.kubeAuth) + result, err := s.handlePodRequestFunc(req, s.clientSet, s.kubeAuth) if err != nil { // Prefix error with request information for easier debugging return nil, fmt.Errorf("%s %v", req, err) @@ -249,8 +237,3 @@ func (s *Server) handleCNIMetrics(w http.ResponseWriter, r *http.Request) { klog.Warningf("Error writing %s HTTP response for metrics post", err) } } - -func (s *Server) EnableOVNPortUpSupport() { - atomic.StoreInt32(&s.useOVSExternalIDs, 1) - klog.Info("OVN Port Binding support now enabled in CNI Server") -} diff --git a/go-controller/pkg/cni/cniserver_test.go b/go-controller/pkg/cni/cniserver_test.go index 6970dfba6a..8d03a6e35a 100644 --- a/go-controller/pkg/cni/cniserver_test.go +++ b/go-controller/pkg/cni/cniserver_test.go @@ -47,7 +47,7 @@ func clientDoCNI(t *testing.T, client *http.Client, req *Request) ([]byte, int) var expectedResult cnitypes.Result -func serverHandleCNI(request *PodRequest, clientset *ClientSet, useOVSExternalIDs bool, kubeAuth *KubeAPIAuth) ([]byte, error) { +func serverHandleCNI(request *PodRequest, clientset *ClientSet, kubeAuth *KubeAPIAuth) ([]byte, error) { if request.Command == CNIAdd { return json.Marshal(&expectedResult) } else if request.Command == CNIDel || request.Command == CNIUpdate || request.Command == CNICheck { @@ -89,7 +89,7 @@ func TestCNIServer(t *testing.T) { t.Fatalf("failed to start watch factory: %v", err) } - s, err := NewCNIServer(false, wf, fakeClient) + s, err := NewCNIServer(wf, fakeClient) if err != nil { t.Fatalf("error creating CNI server: %v", err) } diff --git a/go-controller/pkg/cni/ovs.go b/go-controller/pkg/cni/ovs.go index b5fcef1e49..a25413c955 100644 --- a/go-controller/pkg/cni/ovs.go +++ b/go-controller/pkg/cni/ovs.go @@ -267,9 +267,9 @@ func waitForPodInterface(ctx context.Context, ifInfo *PodInterfaceInfo, var ofPort int var err error - mac := ifInfo.MAC.String() - ifAddrs := ifInfo.IPs - checkExternalIDs := ifInfo.CheckExtIDs + // DPUHost mode can't use OVS external IDs for port-up detection because + // there is no ovn-controller running in DPUHost mode to set port-up + checkExternalIDs := !ifInfo.IsDPUHostMode if checkExternalIDs { detail = " (ovn-installed)" } else { @@ -278,6 +278,9 @@ func waitForPodInterface(ctx context.Context, ifInfo *PodInterfaceInfo, return err } } + + mac := ifInfo.MAC.String() + ifAddrs := ifInfo.IPs for { select { case <-ctx.Done(): diff --git a/go-controller/pkg/cni/types.go b/go-controller/pkg/cni/types.go index ac65f0c4a3..f7abcaf921 100644 --- a/go-controller/pkg/cni/types.go +++ b/go-controller/pkg/cni/types.go @@ -46,7 +46,6 @@ type PodInterfaceInfo struct { RoutableMTU int `json:"routable-mtu"` Ingress int64 `json:"ingress"` Egress int64 `json:"egress"` - CheckExtIDs bool `json:"check-external-ids"` IsDPUHostMode bool `json:"is-dpu-host-mode"` PodUID string `json:"pod-uid"` NetdevName string `json:"vf-netdev-name"` @@ -159,7 +158,7 @@ type PodRequest struct { nadName string } -type podRequestFunc func(request *PodRequest, clientset *ClientSet, useOVSExternalIDs bool, kubeAuth *KubeAPIAuth) ([]byte, error) +type podRequestFunc func(request *PodRequest, clientset *ClientSet, kubeAuth *KubeAPIAuth) ([]byte, error) type PodInfoGetter interface { getPod(namespace, name string) (*kapi.Pod, error) @@ -183,7 +182,6 @@ func NewClientSet(kclient kubernetes.Interface, podLister corev1listers.PodListe type Server struct { http.Server handlePodRequestFunc podRequestFunc - useOVSExternalIDs int32 clientSet *ClientSet kubeAuth *KubeAPIAuth } diff --git a/go-controller/pkg/cni/utils.go b/go-controller/pkg/cni/utils.go index a594ec7e91..d85cb54bea 100644 --- a/go-controller/pkg/cni/utils.go +++ b/go-controller/pkg/cni/utils.go @@ -98,7 +98,7 @@ func GetPodWithAnnotations(ctx context.Context, getter PodInfoGetter, } // PodAnnotation2PodInfo creates PodInterfaceInfo from Pod annotations and additional attributes -func PodAnnotation2PodInfo(podAnnotation map[string]string, podNADAnnotation *util.PodAnnotation, checkExtIDs bool, podUID, +func PodAnnotation2PodInfo(podAnnotation map[string]string, podNADAnnotation *util.PodAnnotation, podUID, netdevname, nadName, netName string, mtu int) (*PodInterfaceInfo, error) { var err error // get pod's annotation of the given NAD if it is not available @@ -123,7 +123,6 @@ func PodAnnotation2PodInfo(podAnnotation map[string]string, podNADAnnotation *ut RoutableMTU: config.Default.RoutableMTU, // TBD, configurable for secondary network? Ingress: ingress, Egress: egress, - CheckExtIDs: checkExtIDs, IsDPUHostMode: config.OvnKubeNode.Mode == types.NodeModeDPUHost, PodUID: podUID, NetdevName: netdevname, @@ -134,7 +133,7 @@ func PodAnnotation2PodInfo(podAnnotation map[string]string, podNADAnnotation *ut return podInterfaceInfo, nil } -//START taken from https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/types/pod_update.go +// START taken from https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/types/pod_update.go const ( ConfigSourceAnnotationKey = "kubernetes.io/config.source" // ApiserverSource identifies updates from Kubernetes API Server. diff --git a/go-controller/pkg/cni/utils_test.go b/go-controller/pkg/cni/utils_test.go index aa40fa922d..d353f76b06 100644 --- a/go-controller/pkg/cni/utils_test.go +++ b/go-controller/pkg/cni/utils_test.go @@ -254,40 +254,28 @@ var _ = Describe("CNI Utils tests", func() { podUID := "4d06bae8-9c38-41f6-945c-f92320e782e4" It("Creates PodInterfaceInfo in NodeModeFull mode", func() { config.OvnKubeNode.Mode = ovntypes.NodeModeFull - pif, err := PodAnnotation2PodInfo(podAnnot, nil, false, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) + pif, err := PodAnnotation2PodInfo(podAnnot, nil, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) Expect(err).ToNot(HaveOccurred()) Expect(pif.IsDPUHostMode).To(BeFalse()) }) It("Creates PodInterfaceInfo in NodeModeDPUHost mode", func() { config.OvnKubeNode.Mode = ovntypes.NodeModeDPUHost - pif, err := PodAnnotation2PodInfo(podAnnot, nil, false, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) + pif, err := PodAnnotation2PodInfo(podAnnot, nil, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) Expect(err).ToNot(HaveOccurred()) Expect(pif.IsDPUHostMode).To(BeTrue()) }) - It("Creates PodInterfaceInfo with checkExtIDs false", func() { - pif, err := PodAnnotation2PodInfo(podAnnot, nil, false, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) - Expect(err).ToNot(HaveOccurred()) - Expect(pif.CheckExtIDs).To(BeFalse()) - }) - - It("Creates PodInterfaceInfo with checkExtIDs true", func() { - pif, err := PodAnnotation2PodInfo(podAnnot, nil, true, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) - Expect(err).ToNot(HaveOccurred()) - Expect(pif.CheckExtIDs).To(BeTrue()) - }) - It("Creates PodInterfaceInfo with EnableUDPAggregation", func() { config.Default.EnableUDPAggregation = true - pif, err := PodAnnotation2PodInfo(podAnnot, nil, false, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) + pif, err := PodAnnotation2PodInfo(podAnnot, nil, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) Expect(err).ToNot(HaveOccurred()) Expect(pif.EnableUDPAggregation).To(BeTrue()) }) It("Creates PodInterfaceInfo without EnableUDPAggregation", func() { config.Default.EnableUDPAggregation = false - pif, err := PodAnnotation2PodInfo(podAnnot, nil, false, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) + pif, err := PodAnnotation2PodInfo(podAnnot, nil, podUID, "", ovntypes.DefaultNetworkName, ovntypes.DefaultNetworkName, config.Default.MTU) Expect(err).ToNot(HaveOccurred()) Expect(pif.EnableUDPAggregation).To(BeFalse()) }) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 0cb9e8b5f2..a0aa868eff 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -453,7 +453,6 @@ type OvnKubeNodeConfig struct { DPResourceDeviceIdsMap map[string][]string MgmtPortNetdev string `gcfg:"mgmt-port-netdev"` MgmtPortDPResourceName string `gcfg:"mgmt-port-dp-resource-name"` - DisableOVNIfaceIdVer bool `gcfg:"disable-ovn-iface-id-ver"` } // ClusterManagerConfig holds configuration for ovnkube-cluster-manager @@ -522,6 +521,8 @@ var ( initGateways bool // legacy gateway-local CLI option gatewayLocal bool + // legacy disable-ovn-iface-id-ver CLI option + disableOVNIfaceIDVer bool ) func init() { @@ -1379,11 +1380,9 @@ var OvnKubeNodeFlags = []cli.Flag{ Destination: &cliConfig.OvnKubeNode.MgmtPortDPResourceName, }, &cli.BoolFlag{ - Name: "disable-ovn-iface-id-ver", - Usage: "if iface-id-ver option is not enabled in ovn, set this flag to True " + - "(depends on ovn version, minimal required is 21.09)", - Value: OvnKubeNode.DisableOVNIfaceIdVer, - Destination: &cliConfig.OvnKubeNode.DisableOVNIfaceIdVer, + Name: "disable-ovn-iface-id-ver", + Usage: "Deprecated; iface-id-ver is always enabled", + Destination: &disableOVNIfaceIDVer, }, } diff --git a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go index 2a0b4db0ed..d59f4d9c05 100644 --- a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go @@ -24,13 +24,12 @@ import ( // nodeNetworkControllerManager structure is the object manages all controllers for all networks for ovnkube-node type nodeNetworkControllerManager struct { - name string - client clientset.Interface - Kube kube.Interface - watchFactory factory.NodeWatchFactory - stopChan chan struct{} - recorder record.EventRecorder - isOvnUpEnabled bool + name string + client clientset.Interface + Kube kube.Interface + watchFactory factory.NodeWatchFactory + stopChan chan struct{} + recorder record.EventRecorder defaultNodeNetworkController nad.BaseNetworkController @@ -56,7 +55,7 @@ func (ncm *nodeNetworkControllerManager) CleanupDeletedNetworks(allControllers [ // newCommonNetworkControllerInfo creates and returns the base node network controller info func (ncm *nodeNetworkControllerManager) newCommonNetworkControllerInfo() *node.CommonNodeNetworkControllerInfo { - return node.NewCommonNodeNetworkControllerInfo(ncm.client, ncm.watchFactory, ncm.recorder, ncm.name, ncm.isOvnUpEnabled) + return node.NewCommonNodeNetworkControllerInfo(ncm.client, ncm.watchFactory, ncm.recorder, ncm.name) } // NewNodeNetworkControllerManager creates a new OVN controller manager to manage all the controller for all networks @@ -82,26 +81,6 @@ func NewNodeNetworkControllerManager(ovnClient *util.OVNClientset, wf factory.No return ncm, nil } -// getOVNIfUpCheckMode check if OVN PortBinding.up can be used -func (ncm *nodeNetworkControllerManager) getOVNIfUpCheckMode() error { - // this support is only used when configure Pod's OVS interface, it is not needed in DPU host mode - if config.OvnKubeNode.DisableOVNIfaceIdVer || config.OvnKubeNode.Mode == ovntypes.NodeModeDPUHost { - klog.Infof("'iface-id-ver' is manually disabled, ovn-installed feature can't be used") - ncm.isOvnUpEnabled = false - return nil - } - - isOvnUpEnabled, err := util.GetOVNIfUpCheckMode() - if err != nil { - return err - } - ncm.isOvnUpEnabled = isOvnUpEnabled - if isOvnUpEnabled { - klog.Infof("Detected support for port binding with external IDs") - } - return nil -} - // initDefaultNodeNetworkController creates the controller for default network func (ncm *nodeNetworkControllerManager) initDefaultNodeNetworkController() error { defaultNodeNetworkController, err := node.NewDefaultNodeNetworkController(ncm.newCommonNetworkControllerInfo()) @@ -119,10 +98,6 @@ func (ncm *nodeNetworkControllerManager) initDefaultNodeNetworkController() erro func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error) { klog.Infof("Starting the node network controller manager, Mode: %s", config.OvnKubeNode.Mode) - if err = ncm.getOVNIfUpCheckMode(); err != nil { - return err - } - // Initialize OVS exec runner; find OVS binaries that the CNI code uses. // Must happen before calling any OVS exec from pkg/cni to prevent races. // Not required in DPUHost mode as OVS is not present there. diff --git a/go-controller/pkg/node/base_node_network_controller_dpu.go b/go-controller/pkg/node/base_node_network_controller_dpu.go index 7b196dcada..69ec1bb6dc 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu.go @@ -3,7 +3,6 @@ package node import ( "context" "fmt" - "sync/atomic" "time" kapi "k8s.io/api/core/v1" @@ -44,11 +43,11 @@ func (bnnc *BaseNodeNetworkController) podReadyToAddDPU(pod *kapi.Pod, nadName s return dpuCD } -func (bnnc *BaseNodeNetworkController) addDPUPodForNAD(pod *kapi.Pod, dpuCD *util.DPUConnectionDetails, isOvnUpEnabled bool, +func (bnnc *BaseNodeNetworkController) addDPUPodForNAD(pod *kapi.Pod, dpuCD *util.DPUConnectionDetails, netName, nadName string, getter cni.PodInfoGetter) error { podDesc := fmt.Sprintf("pod %s/%s for NAD %s", pod.Namespace, pod.Name, nadName) klog.Infof("Adding %s on DPU", podDesc) - podInterfaceInfo, err := cni.PodAnnotation2PodInfo(pod.Annotations, nil, isOvnUpEnabled, + podInterfaceInfo, err := cni.PodAnnotation2PodInfo(pod.Annotations, nil, string(pod.UID), "", nadName, netName, config.Default.MTU) if err != nil { return fmt.Errorf("failed to get pod interface information of %s: %v. retrying", podDesc, err) @@ -134,11 +133,10 @@ func (bnnc *BaseNodeNetworkController) watchPodsDPU() (*factory.Handler, error) nadToDPUCDMap = map[string]*util.DPUConnectionDetails{types.DefaultNetworkName: nil} } - isOvnUpEnabled := atomic.LoadInt32(&bnnc.atomicOvnUpEnabled) > 0 for nadName := range nadToDPUCDMap { dpuCD := bnnc.podReadyToAddDPU(pod, nadName) if dpuCD != nil { - err := bnnc.addDPUPodForNAD(pod, dpuCD, isOvnUpEnabled, netName, nadName, clientSet) + err := bnnc.addDPUPodForNAD(pod, dpuCD, netName, nadName, clientSet) if err != nil { klog.Errorf(err.Error()) } else { @@ -177,11 +175,10 @@ func (bnnc *BaseNodeNetworkController) watchPodsDPU() (*factory.Handler, error) nadToDPUCDMap[nadName] = nil } if newDPUCD != nil { - isOvnUpEnabled := atomic.LoadInt32(&bnnc.atomicOvnUpEnabled) > 0 klog.Infof("Adding VF during update because either during Pod Add we failed to add VF or "+ "connection details weren't present or the VF ID has changed. Old connection details (%v), "+ "New connection details (%v)", oldDPUCD, newDPUCD) - err := bnnc.addDPUPodForNAD(newPod, newDPUCD, isOvnUpEnabled, netName, nadName, clientSet) + err := bnnc.addDPUPodForNAD(newPod, newDPUCD, netName, nadName, clientSet) if err != nil { klog.Errorf(err.Error()) } else { diff --git a/go-controller/pkg/node/base_node_network_controller_dpu_test.go b/go-controller/pkg/node/base_node_network_controller_dpu_test.go index 05634cb6f5..e20e57043e 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu_test.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu_test.go @@ -106,7 +106,7 @@ var _ = Describe("Node DPU tests", func() { kubeMock = kubemocks.Interface{} factoryMock = factorymocks.NodeWatchFactory{} - cnnci := newCommonNodeNetworkControllerInfo(nil, &kubeMock, &factoryMock, nil, "", false) + cnnci := newCommonNodeNetworkControllerInfo(nil, &kubeMock, &factoryMock, nil, "") dnnc = newDefaultNodeNetworkController(cnnci, nil, nil) podNamespaceLister = v1mocks.PodNamespaceLister{} diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index f9dd1b0fd9..77f5c89463 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -10,7 +10,6 @@ import ( "strconv" "strings" "sync" - "sync/atomic" "time" kapi "k8s.io/api/core/v1" @@ -46,9 +45,6 @@ type CommonNodeNetworkControllerInfo struct { watchFactory factory.NodeWatchFactory recorder record.EventRecorder name string - - // atomic integer value to indicate if PortBinding.up is supported - atomicOvnUpEnabled int32 } // BaseNodeNetworkController structure per-network fields and network specific configuration @@ -71,27 +67,21 @@ type BaseNodeNetworkController struct { } func newCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, kube kube.Interface, - wf factory.NodeWatchFactory, eventRecorder record.EventRecorder, name string, - isOvnUpEnabled bool) *CommonNodeNetworkControllerInfo { - var atomicOvnUpEnabled int32 - if isOvnUpEnabled { - atomicOvnUpEnabled = 1 - } + wf factory.NodeWatchFactory, eventRecorder record.EventRecorder, name string) *CommonNodeNetworkControllerInfo { return &CommonNodeNetworkControllerInfo{ - client: kubeClient, - Kube: kube, - watchFactory: wf, - name: name, - recorder: eventRecorder, - atomicOvnUpEnabled: atomicOvnUpEnabled, + client: kubeClient, + Kube: kube, + watchFactory: wf, + name: name, + recorder: eventRecorder, } } // NewCommonNodeNetworkControllerInfo creates and returns the base node network controller info func NewCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, wf factory.NodeWatchFactory, - eventRecorder record.EventRecorder, name string, isOvnUpEnabled bool) *CommonNodeNetworkControllerInfo { - return newCommonNodeNetworkControllerInfo(kubeClient, &kube.Kube{KClient: kubeClient}, wf, eventRecorder, name, isOvnUpEnabled) + eventRecorder record.EventRecorder, name string) *CommonNodeNetworkControllerInfo { + return newCommonNodeNetworkControllerInfo(kubeClient, &kube.Kube{KClient: kubeClient}, wf, eventRecorder, name) } // DefaultNodeNetworkController is the object holder for utilities meant for node management of default network @@ -689,13 +679,12 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { klog.Infof("Node %s ready for ovn initialization with subnet %s", nc.name, util.JoinIPNets(subnets, ",")) // Create CNI Server - isOvnUpEnabled := atomic.LoadInt32(&nc.atomicOvnUpEnabled) > 0 if config.OvnKubeNode.Mode != types.NodeModeDPU { kclient, ok := nc.Kube.(*kube.Kube) if !ok { return fmt.Errorf("cannot get kubeclient for starting CNI server") } - cniServer, err = cni.NewCNIServer(isOvnUpEnabled, nc.watchFactory, kclient.KClient) + cniServer, err = cni.NewCNIServer(nc.watchFactory, kclient.KClient) if err != nil { return err } @@ -832,20 +821,6 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } } } - - // ensure CNI support for port binding built into OVN, as masters have been upgraded - if initialTopoVersion < types.OvnPortBindingTopoVersion && !isOvnUpEnabled && !config.OvnKubeNode.DisableOVNIfaceIdVer { - isOvnUpEnabled, err := util.GetOVNIfUpCheckMode() - if err != nil { - klog.Errorf("%v", err) - } else if isOvnUpEnabled { - klog.Infof("Detected support for port binding with external IDs") - if cniServer != nil { - cniServer.EnableOVNPortUpSupport() - } - atomic.StoreInt32(&nc.atomicOvnUpEnabled, 1) - } - } }() } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 0c35898eeb..6366208cd3 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -729,7 +729,7 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, err = wf.Start() Expect(err).NotTo(HaveOccurred()) - cnnci := NewCommonNodeNetworkControllerInfo(nil, wf, nil, nodeName, false) + cnnci := NewCommonNodeNetworkControllerInfo(nil, wf, nil, nodeName) nc := newDefaultNodeNetworkController(cnnci, stop, wg) // must run route manager manually which is usually started with nc.Start() wg.Add(1) diff --git a/go-controller/pkg/node/ovn_test.go b/go-controller/pkg/node/ovn_test.go index 1b94c0674a..0f82c1d7fd 100644 --- a/go-controller/pkg/node/ovn_test.go +++ b/go-controller/pkg/node/ovn_test.go @@ -79,7 +79,7 @@ func (o *FakeOVNNode) init() { o.watcher, err = factory.NewNodeWatchFactory(o.fakeClient, fakeNodeName) Expect(err).NotTo(HaveOccurred()) - cnnci := NewCommonNodeNetworkControllerInfo(o.fakeClient.KubeClient, o.watcher, o.recorder, fakeNodeName, false) + cnnci := NewCommonNodeNetworkControllerInfo(o.fakeClient.KubeClient, o.watcher, o.recorder, fakeNodeName) o.nc = newDefaultNodeNetworkController(cnnci, o.stopChan, o.wg) // watcher is started by nodeNetworkControllerManager, not by nodeNetworkcontroller, so start it here. o.watcher.Start() diff --git a/go-controller/pkg/util/ovn.go b/go-controller/pkg/util/ovn.go index c760832685..9b717dd551 100644 --- a/go-controller/pkg/util/ovn.go +++ b/go-controller/pkg/util/ovn.go @@ -6,7 +6,6 @@ package util import ( "fmt" "net" - "strings" ocpconfigapi "github.com/openshift/api/config/v1" libovsdbclient "github.com/ovn-org/libovsdb/client" @@ -47,21 +46,3 @@ func PlatformTypeIsEgressIPCloudProvider() bool { config.Kubernetes.PlatformType == string(ocpconfigapi.AzurePlatformType) || config.Kubernetes.PlatformType == string(ocpconfigapi.OpenStackPlatformType) } - -// GetOVNIfUpCheckMode returns true if OVN supports Port_Binding.up -// -// Starting with v21.03.0 OVN sets OVS.Interface.external-id:ovn-installed -// and OVNSB.Port_Binding.up when all OVS flows associated to a -// logical port have been successfully programmed. -// OVS.Interface.external-id:ovn-installed can only be used correctly -// in a combination with OVS.Interface.external-id:iface-id-ver -func GetOVNIfUpCheckMode() (bool, error) { - if _, stderr, err := RunOVNSbctl("--columns=up", "list", "Port_Binding"); err != nil { - if strings.Contains(stderr, "does not contain a column") { - return false, nil - } - return false, fmt.Errorf("failed to check if port_binding is supported in OVN, stderr: %q, error: %v", - stderr, err) - } - return true, nil -} From c8e377cb0fcadb34c7cf2dd3005c05a126d351fc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 19 May 2023 11:44:04 -0500 Subject: [PATCH 57/90] workflows: set read-only permissions where possible Most of our workflows require zero write access to the repo. Signed-off-by: Dan Williams --- .github/workflows/docker.yml | 3 +++ .github/workflows/test.yml | 3 +++ .github/workflows/test_periodic.yml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index aa3d1600d9..602c36157f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -4,6 +4,9 @@ on: push: branches: [ master ] +permissions: + contents: read + env: GO_VERSION: 1.18.4 REGISTRY: ghcr.io diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e5634d1ed7..ba83051518 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,9 @@ on: schedule: - cron: '0 */12 * * *' +permissions: + contents: read + env: GO_VERSION: "1.18.4" K8S_VERSION: v1.24.0 diff --git a/.github/workflows/test_periodic.yml b/.github/workflows/test_periodic.yml index 46db7f41a2..962a75c03d 100644 --- a/.github/workflows/test_periodic.yml +++ b/.github/workflows/test_periodic.yml @@ -5,6 +5,9 @@ on: - cron: '0 6 * * *' workflow_dispatch: +permissions: + contents: read + env: GO_VERSION: "1.18.4" K8S_VERSION: v1.24.0 From 44c3c3026cbcc1f47d5042160bbae05e4c53cc2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Fri, 19 May 2023 16:08:09 +0000 Subject: [PATCH 58/90] Fix localnet set as layer2 in netinfo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jaime CaamaƱo Ruiz --- go-controller/pkg/util/multi_network.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index 83f9b37036..7db9b06486 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -272,14 +272,14 @@ func newLayer2NetConfInfo(netconf *ovncnitypes.NetConf) (NetInfo, error) { } func newLocalnetNetConfInfo(netconf *ovncnitypes.NetConf) (NetInfo, error) { - subnets, excludes, err := parseSubnets(netconf.Subnets, netconf.ExcludeSubnets, types.Layer2Topology) + subnets, excludes, err := parseSubnets(netconf.Subnets, netconf.ExcludeSubnets, types.LocalnetTopology) if err != nil { return nil, fmt.Errorf("invalid %s netconf %s: %v", netconf.Topology, netconf.Name, err) } ni := &secondaryNetInfo{ netName: netconf.Name, - topology: types.Layer2Topology, + topology: types.LocalnetTopology, subnets: subnets, excludeSubnets: excludes, mtu: netconf.MTU, From ff8be944d1b9dd6df7ba629d0119e7542e4493b9 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 5 Apr 2023 14:12:34 +0200 Subject: [PATCH 59/90] OVS Daemon CPU Pinning Make ovnkube-node manage the CPU affinity of ovs-vswitchd and ovsdb-server processes, aligning them to its own CPU affinity. The feature is enabled when the file `/etc/openvswitch/enable_dynamic_cpu_affinity` is present and not empty on the filesystem. Refactor `pkg/util/ovs.go` to expose `GetOvsVSwitchdPID()` and `GetOvsDBServerPID()`. Signed-off-by: Andrea Panattoni --- .../node/default_node_network_controller.go | 7 + .../pkg/node/ovspinning/ovspinning_linux.go | 191 ++++++++++++++++++ .../node/ovspinning/ovspinning_linux_test.go | 187 +++++++++++++++++ .../pkg/node/ovspinning/ovspinning_noop.go | 12 ++ go-controller/pkg/util/ovs.go | 27 ++- 5 files changed, 421 insertions(+), 3 deletions(-) create mode 100644 go-controller/pkg/node/ovspinning/ovspinning_linux.go create mode 100644 go-controller/pkg/node/ovspinning/ovspinning_linux_test.go create mode 100644 go-controller/pkg/node/ovspinning/ovspinning_noop.go diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 77f5c89463..04bd6f1e69 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -31,6 +31,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/upgrade" nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ovspinning" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" retry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -917,6 +918,12 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { }() } + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + ovspinning.Run(nc.stopChan) + }() + klog.Infof("Default node network controller initialized and ready.") return nil } diff --git a/go-controller/pkg/node/ovspinning/ovspinning_linux.go b/go-controller/pkg/node/ovspinning/ovspinning_linux.go new file mode 100644 index 0000000000..3ac12d40ea --- /dev/null +++ b/go-controller/pkg/node/ovspinning/ovspinning_linux.go @@ -0,0 +1,191 @@ +//go:build linux +// +build linux + +package ovspinning + +import ( + "fmt" + "os" + "strconv" + "time" + + "github.com/fsnotify/fsnotify" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "golang.org/x/sys/unix" + "k8s.io/klog/v2" +) + +// These variables are meant to be used in unit tests +var tickDuration time.Duration = 10 * time.Second +var getOvsVSwitchdPIDFn func() (string, error) = util.GetOvsVSwitchdPID +var getOvsDBServerPIDFn func() (string, error) = util.GetOvsDBServerPID +var featureEnablerFile string = "/etc/openvswitch/enable_dynamic_cpu_affinity" + +// Run monitors OVS daemon's processes (ovs-vswitchd and ovsdb-server) and sets their CPU affinity +// masks to that of the current process. +// This feature is enabled by the presence of a non-empty file in the path `/etc/openvswitch/enable_dynamic_cpu_affinity` +func Run(stopCh <-chan struct{}) { + + // The file must be present at startup to enable the feature + isFeatureEnabled, err := isFileNotEmpty(featureEnablerFile) + if err != nil { + klog.Warningf("Can't start OVS CPU affinity pinning: %v", err) + return + } + + if !isFeatureEnabled { + klog.Info("OVS CPU affinity pinning disabled") + return + } + + klog.Infof("Starting OVS daemon CPU pinning") + defer klog.Infof("Stopping OVS daemon CPU pinning") + + var fsnotifyEvents chan fsnotify.Event + var fsnotifyErrors chan error + fileWatcher, err := createFileWatcherFor(featureEnablerFile) + if err != nil { + klog.Warningf("Can't create a watcher for %s. Pinning will not stop by deleting it: %v", featureEnablerFile, err) + fsnotifyEvents = make(chan fsnotify.Event) + fsnotifyErrors = make(chan error) + } else { + fsnotifyEvents = fileWatcher.Events + fsnotifyErrors = fileWatcher.Errors + defer fileWatcher.Close() + } + + ticker := time.NewTicker(tickDuration) + defer ticker.Stop() + + for { + select { + case event, ok := <-fsnotifyEvents: + if !ok { + continue + } + + if event.Op.Has(fsnotify.Remove) { + klog.Infof("File [%s] has been removed. To re-enable the feature, restart ovnkube-node", featureEnablerFile) + return + } + + isFeatureEnabled, err = isFileNotEmpty(featureEnablerFile) + if err != nil { + klog.Warningf("Error while reading [%s]: %v", featureEnablerFile, err) + return + } + + if !isFeatureEnabled { + klog.Infof("File [%s] is empty or missing. To re-enable the feature, restart ovnkube-node", featureEnablerFile) + return + } + + case err, ok := <-fsnotifyErrors: + if ok { + klog.Errorf("Error watching for file [%s] changes: %s", featureEnablerFile, err) + } + + case <-stopCh: + return + + case <-ticker.C: + if !isFeatureEnabled { + continue + } + + err := setOvsVSwitchdCPUAffinity() + if err != nil { + klog.Warningf("Error while aligning ovs-vswitchd CPUs to current process: %v", err) + } + + err = setOvsDBServerCPUAffinity() + if err != nil { + klog.Warningf("Error while aligning ovsdb-server CPUs to current process: %v", err) + } + } + } +} + +func createFileWatcherFor(filename string) (*fsnotify.Watcher, error) { + fileWatcher, err := fsnotify.NewWatcher() + if err != nil { + return nil, fmt.Errorf("failed to create filesystem watcher: %w", err) + } + + err = fileWatcher.Add(filename) + if err != nil { + return nil, fmt.Errorf("unable to watch [%s] file: %w", filename, err) + } + + return fileWatcher, nil +} + +func isFileNotEmpty(filename string) (bool, error) { + f, err := os.Stat(filename) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, fmt.Errorf("can't get file information [%s]: %w", filename, err) + } + + // get the size + return f.Size() > 0, nil +} + +func setOvsVSwitchdCPUAffinity() error { + + ovsVSwitchdPID, err := getOvsVSwitchdPIDFn() + if err != nil { + return fmt.Errorf("can't retrieve ovs-vswitchd PID: %w", err) + } + + klog.V(5).Info("Managing ovs-vswitchd[%s] daemon CPU affinity", ovsVSwitchdPID) + return setProcessCPUAffinity(ovsVSwitchdPID) +} + +func setOvsDBServerCPUAffinity() error { + + ovsDBserverPID, err := getOvsDBServerPIDFn() + if err != nil { + return fmt.Errorf("can't retrieve ovsdb-server PID: %w", err) + } + + klog.V(5).Infof("Managing ovsdb-server[%s] daemon CPU affinity", ovsDBserverPID) + return setProcessCPUAffinity(ovsDBserverPID) +} + +// setProcessCPUAffinity sets the CPU affinity of the given process to the same affinity as the current process +func setProcessCPUAffinity(targetPIDStr string) error { + + targetPID, err := strconv.Atoi(targetPIDStr) + if err != nil { + return fmt.Errorf("can't convert PID[%s] to integer: %w", targetPIDStr, err) + } + + var currentProcessCPUs unix.CPUSet + err = unix.SchedGetaffinity(os.Getpid(), ¤tProcessCPUs) + if err != nil { + return fmt.Errorf("can't get own CPU affinity") + } + + var targetProcessCPUs unix.CPUSet + err = unix.SchedGetaffinity(targetPID, &targetProcessCPUs) + if err != nil { + return fmt.Errorf("can't get process (PID:%d) CPU affinity: %w", targetPID, err) + } + + if currentProcessCPUs == targetProcessCPUs { + klog.V(5).Info("Process[%d] CPU affinity already match current process's affinity %x", targetPID, currentProcessCPUs) + return nil + } + + klog.Infof("Setting CPU affinity of PID(%d) to %x, was %x", targetPID, currentProcessCPUs, targetProcessCPUs) + + err = unix.SchedSetaffinity(targetPID, ¤tProcessCPUs) + if err != nil { + return fmt.Errorf("can't set CPU affinity of PID(%d) to %x: %w", targetPID, currentProcessCPUs, err) + } + + return nil +} diff --git a/go-controller/pkg/node/ovspinning/ovspinning_linux_test.go b/go-controller/pkg/node/ovspinning/ovspinning_linux_test.go new file mode 100644 index 0000000000..a47ffc36e4 --- /dev/null +++ b/go-controller/pkg/node/ovspinning/ovspinning_linux_test.go @@ -0,0 +1,187 @@ +//go:build linux +// +build linux + +package ovspinning + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "os/exec" + "runtime" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "golang.org/x/sys/unix" + "k8s.io/klog/v2" +) + +func TestAlignCPUAffinity(t *testing.T) { + + ovsDBPid, ovsDBStop := mockOvsdbProcess(t) + defer ovsDBStop() + + ovsVSwitchdPid, ovsVSwitchdStop := mockOvsVSwitchdProcess(t) + defer ovsVSwitchdStop() + + defer setTickDuration(20 * time.Millisecond)() + defer mockFeatureEnableFile(t, "1")() + + var wg sync.WaitGroup + stopCh := make(chan struct{}) + defer func() { + close(stopCh) + wg.Wait() + }() + + wg.Add(1) + go func() { + // Be sure the system under test goroutine is finished before cleaning + defer wg.Done() + Run(stopCh) + }() + + var initialCPUset unix.CPUSet + err := unix.SchedGetaffinity(os.Getpid(), &initialCPUset) + assert.NoError(t, err) + + defer func() { + // Restore any previous CPU affinity value it was in place before the test + err = unix.SchedSetaffinity(os.Getpid(), &initialCPUset) + assert.NoError(t, err) + }() + + assert.Greater(t, runtime.NumCPU(), 1) + + for i := 0; i < runtime.NumCPU(); i++ { + var tmpCPUset unix.CPUSet + tmpCPUset.Set(i) + err = unix.SchedSetaffinity(os.Getpid(), &tmpCPUset) + assert.NoError(t, err) + + klog.Infof("Test CPU Affinity %x", tmpCPUset) + + assertPIDHasSchedAffinity(t, ovsVSwitchdPid, tmpCPUset) + assertPIDHasSchedAffinity(t, ovsDBPid, tmpCPUset) + } + + // Disable the feature by making the enabler file empty + ioutil.WriteFile(featureEnablerFile, []byte(""), 0) + assert.NoError(t, err) + + var tmpCPUset unix.CPUSet + tmpCPUset.Set(0) + err = unix.SchedSetaffinity(os.Getpid(), &tmpCPUset) + assert.NoError(t, err) + + assertNeverPIDHasSchedAffinity(t, ovsVSwitchdPid, tmpCPUset) + assertNeverPIDHasSchedAffinity(t, ovsDBPid, tmpCPUset) +} + +func TestIsFileNotEmpty(t *testing.T) { + + defer mockFeatureEnableFile(t, "")() + + result, err := isFileNotEmpty(featureEnablerFile) + assert.NoError(t, err) + assert.False(t, result) + + ioutil.WriteFile(featureEnablerFile, []byte("1"), 0) + result, err = isFileNotEmpty(featureEnablerFile) + assert.NoError(t, err) + assert.True(t, result) + + os.Remove(featureEnablerFile) + result, err = isFileNotEmpty(featureEnablerFile) + assert.NoError(t, err) + assert.False(t, result) +} + +func mockOvsdbProcess(t *testing.T) (int, func()) { + ctx, stopCmd := context.WithCancel(context.Background()) + defer stopCmd() + + cmd := exec.CommandContext(ctx, "sleep", "10") + + err := cmd.Start() + assert.NoError(t, err) + + previousGetter := getOvsDBServerPIDFn + getOvsDBServerPIDFn = func() (string, error) { + return fmt.Sprintf("%d", cmd.Process.Pid), nil + } + + return cmd.Process.Pid, func() { + stopCmd() + getOvsDBServerPIDFn = previousGetter + } +} + +func mockOvsVSwitchdProcess(t *testing.T) (int, func()) { + ctx, stopCmd := context.WithCancel(context.Background()) + defer stopCmd() + + cmd := exec.CommandContext(ctx, "sleep", "10") + + err := cmd.Start() + assert.NoError(t, err) + + previousGetter := getOvsVSwitchdPIDFn + getOvsVSwitchdPIDFn = func() (string, error) { + return fmt.Sprintf("%d", cmd.Process.Pid), nil + } + + return cmd.Process.Pid, func() { + stopCmd() + getOvsVSwitchdPIDFn = previousGetter + } +} + +func setTickDuration(d time.Duration) func() { + previousValue := tickDuration + tickDuration = d + + return func() { + tickDuration = previousValue + } +} + +func mockFeatureEnableFile(t *testing.T, data string) func() { + + f, err := ioutil.TempFile("", "enable_dynamic_cpu_affinity") + assert.NoError(t, err) + + previousValue := featureEnablerFile + featureEnablerFile = f.Name() + + ioutil.WriteFile(featureEnablerFile, []byte(data), 0) + assert.NoError(t, err) + + return func() { + featureEnablerFile = previousValue + os.Remove(f.Name()) + } +} + +func assertPIDHasSchedAffinity(t *testing.T, pid int, expectedCPUSet unix.CPUSet) { + var actual unix.CPUSet + assert.Eventually(t, func() bool { + err := unix.SchedGetaffinity(pid, &actual) + assert.NoError(t, err) + + return actual == expectedCPUSet + }, time.Second, 10*time.Millisecond, "pid[%d] Expected CPUSet %0x != Actual CPUSet %0x", pid, expectedCPUSet, actual) +} + +func assertNeverPIDHasSchedAffinity(t *testing.T, pid int, targetCPUSet unix.CPUSet) { + var actual unix.CPUSet + assert.Never(t, func() bool { + err := unix.SchedGetaffinity(pid, &actual) + assert.NoError(t, err) + + return actual == targetCPUSet + }, time.Second, 10*time.Millisecond, "pid[%d] == Actual CPUSet %0x expected to be different than %0x", pid, actual, targetCPUSet) +} diff --git a/go-controller/pkg/node/ovspinning/ovspinning_noop.go b/go-controller/pkg/node/ovspinning/ovspinning_noop.go new file mode 100644 index 0000000000..3e668c6fb0 --- /dev/null +++ b/go-controller/pkg/node/ovspinning/ovspinning_noop.go @@ -0,0 +1,12 @@ +//go:build !linux +// +build !linux + +package ovspinning + +import ( + "k8s.io/klog/v2" +) + +func Run(_ <-chan struct{}) { + klog.Infof("OVS CPU pinning is supported on linux platform only") +} diff --git a/go-controller/pkg/util/ovs.go b/go-controller/pkg/util/ovs.go index 5a7d6954e4..7e473d0876 100644 --- a/go-controller/pkg/util/ovs.go +++ b/go-controller/pkg/util/ovs.go @@ -605,19 +605,40 @@ func RunOVNControllerAppCtl(args ...string) (string, string, error) { // RunOvsVswitchdAppCtl runs an 'ovs-appctl -t /var/run/openvsiwthc/ovs-vswitchd.pid.ctl command' func RunOvsVswitchdAppCtl(args ...string) (string, string, error) { var cmdArgs []string - pid, err := afero.ReadFile(AppFs, savedOVSRunDir+"ovs-vswitchd.pid") + pid, err := GetOvsVSwitchdPID() if err != nil { - return "", "", fmt.Errorf("failed to get ovs-vswitch pid : %v", err) + return "", "", err } + cmdArgs = []string{ "-t", - savedOVSRunDir + fmt.Sprintf("ovs-vswitchd.%s.ctl", strings.TrimSpace(string(pid))), + savedOVSRunDir + fmt.Sprintf("ovs-vswitchd.%s.ctl", pid), } cmdArgs = append(cmdArgs, args...) stdout, stderr, err := runOVNretry(runner.appctlPath, nil, cmdArgs...) return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err } +// GetOvsVSwitchdPID retrieves the Process IDentifier for ovs-vswitchd daemon. +func GetOvsVSwitchdPID() (string, error) { + pid, err := afero.ReadFile(AppFs, savedOVSRunDir+"ovs-vswitchd.pid") + if err != nil { + return "", fmt.Errorf("failed to get ovs-vswitch pid : %v", err) + } + + return strings.TrimSpace(string(pid)), nil +} + +// GetOvsDBServerPID retrieves the Process IDentifier for ovs-vswitchd daemon. +func GetOvsDBServerPID() (string, error) { + pid, err := afero.ReadFile(AppFs, savedOVSRunDir+"ovsdb-server.pid") + if err != nil { + return "", fmt.Errorf("failed to get ovsdb-server pid : %v", err) + } + + return strings.TrimSpace(string(pid)), nil +} + // RunIP runs a command via the iproute2 "ip" utility func RunIP(args ...string) (string, string, error) { stdout, stderr, err := run(runner.ipPath, args...) From 15ea97e8794a231259abba4c896e73ca113ce6e4 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 17 May 2023 17:28:58 +0200 Subject: [PATCH 60/90] Add smoke tests for OVS CPU affinity pinning Add a kind based test to verify that the feature can be enabled creating the `enable_dynamic_cpu_affinity` file on the host. Fix ovnkube-node Daemonset yaml definition to mount `/etc/openvswitch` from the host. Signed-off-by: Andrea Panattoni --- dist/templates/ovnkube-node.yaml.j2 | 5 ++- test/e2e/e2e.go | 52 +++++++++++++++++++++++++++++ test/e2e/ovspinning.go | 31 +++++++++++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 test/e2e/ovspinning.go diff --git a/dist/templates/ovnkube-node.yaml.j2 b/dist/templates/ovnkube-node.yaml.j2 index 660d883c7a..e66701c070 100644 --- a/dist/templates/ovnkube-node.yaml.j2 +++ b/dist/templates/ovnkube-node.yaml.j2 @@ -85,7 +85,7 @@ spec: name: host-ovn-cert readOnly: true - mountPath: /etc/openvswitch/ - name: host-var-lib-ovs + name: host-etc-ovs readOnly: true - mountPath: /etc/ovn/ name: host-var-lib-ovs @@ -381,6 +381,9 @@ spec: - name: host-var-lib-ovs hostPath: path: /var/lib/openvswitch + - name: host-etc-ovs + hostPath: + path: /etc/openvswitch {%- elif ovnkube_app_name=="ovnkube-node-dpu-host" %} - name: var-run-ovn emptyDir: {} diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index b0aec93999..3d0ac0e39b 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -1,6 +1,7 @@ package e2e import ( + "bufio" "context" "encoding/json" "fmt" @@ -11,6 +12,7 @@ import ( "os" "os/exec" "path" + "regexp" "strconv" "strings" "sync" @@ -24,6 +26,7 @@ import ( "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" @@ -485,6 +488,55 @@ func restartOVNKubeNodePod(clientset kubernetes.Interface, namespace string, nod return nil } +// restartOVNKubeNodePodsInParallel restarts multiple ovnkube-node pods in parallel. See `restartOVNKubeNodePod` +func restartOVNKubeNodePodsInParallel(clientset kubernetes.Interface, namespace string, nodeNames ...string) error { + framework.Logf("restarting ovnkube-node for %v", nodeNames) + + restartFuncs := make([]func() error, 0, len(nodeNames)) + for _, n := range nodeNames { + nodeName := n + restartFuncs = append(restartFuncs, func() error { + return restartOVNKubeNodePod(clientset, ovnNamespace, nodeName) + }) + } + + return utilerrors.AggregateGoroutines(restartFuncs...) +} + +// getOVNKubePodLogsFiltered retrieves logs from ovnkube-node pods and filters logs lines according to filteringRegexp +func getOVNKubePodLogsFiltered(clientset kubernetes.Interface, namespace, nodeName, filteringRegexp string) (string, error) { + ovnKubeNodePods, err := clientset.CoreV1().Pods(ovnNamespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "name=ovnkube-node", + FieldSelector: "spec.nodeName=" + nodeName, + }) + if err != nil { + return "", fmt.Errorf("getOVNKubePodLogsFiltered: error while getting ovnkube-node pods: %w", err) + } + + logs, err := e2epod.GetPodLogs(clientset, ovnNamespace, ovnKubeNodePods.Items[0].Name, "ovnkube-node") + if err != nil { + return "", fmt.Errorf("getOVNKubePodLogsFiltered: error while getting ovnkube-node [%s/%s] logs: %w", + ovnNamespace, ovnKubeNodePods.Items[0].Name, err) + } + + scanner := bufio.NewScanner(strings.NewReader(logs)) + filteredLogs := "" + re := regexp.MustCompile(filteringRegexp) + for scanner.Scan() { + line := scanner.Text() + if re.MatchString(line) { + filteredLogs += line + "\n" + } + } + + err = scanner.Err() + if err != nil { + return "", fmt.Errorf("getOVNKubePodLogsFiltered: error while scanning ovnkube-node logs: %w", err) + } + + return filteredLogs, nil +} + func findOvnKubeMasterNode() (string, error) { ovnkubeMasterNode, err := framework.RunKubectl(ovnNs, "get", "leases", "ovn-kubernetes-master-global", diff --git a/test/e2e/ovspinning.go b/test/e2e/ovspinning.go new file mode 100644 index 0000000000..cb3a29aa8a --- /dev/null +++ b/test/e2e/ovspinning.go @@ -0,0 +1,31 @@ +package e2e + +import ( + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" +) + +var _ = ginkgo.Describe("OVS CPU affinity pinning", func() { + + f := newPrivelegedTestFramework("ovspinning") + + ginkgo.It("can be enabled on specific nodes by creating enable_dynamic_cpu_affinity file", func() { + + nodeWithEnabledOvsAffinityPinning := "ovn-worker2" + + _, err := runCommand(containerRuntime, "exec", nodeWithEnabledOvsAffinityPinning, "bash", "-c", "echo 1 > /etc/openvswitch/enable_dynamic_cpu_affinity") + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + restartOVNKubeNodePodsInParallel(f.ClientSet, ovnNamespace, "ovn-worker", "ovn-worker2") + + enabledNodeLogs, err := getOVNKubePodLogsFiltered(f.ClientSet, ovnNamespace, "ovn-worker2", ".*ovspinning_linux.go.*$") + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Expect(enabledNodeLogs).To(gomega.ContainSubstring("Starting OVS daemon CPU pinning")) + + disabledNodeLogs, err := getOVNKubePodLogsFiltered(f.ClientSet, ovnNamespace, "ovn-worker", ".*ovspinning_linux.go.*$") + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(disabledNodeLogs).To(gomega.ContainSubstring("OVS CPU affinity pinning disabled")) + }) + +}) From 6ace7850afa8cefb8eb2bf7417b72de49ca1a071 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 19 May 2023 17:13:56 +0200 Subject: [PATCH 61/90] Pretty print CPUSet Implement a function to make `unix.CPUSet` objects easier to read in logs. Signed-off-by: Andrea Panattoni --- .../pkg/node/ovspinning/ovspinning_linux.go | 54 +++++++++++++++++-- .../node/ovspinning/ovspinning_linux_test.go | 33 ++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/node/ovspinning/ovspinning_linux.go b/go-controller/pkg/node/ovspinning/ovspinning_linux.go index 3ac12d40ea..3964524eeb 100644 --- a/go-controller/pkg/node/ovspinning/ovspinning_linux.go +++ b/go-controller/pkg/node/ovspinning/ovspinning_linux.go @@ -4,9 +4,11 @@ package ovspinning import ( + "bytes" "fmt" "os" "strconv" + "strings" "time" "github.com/fsnotify/fsnotify" @@ -176,16 +178,62 @@ func setProcessCPUAffinity(targetPIDStr string) error { } if currentProcessCPUs == targetProcessCPUs { - klog.V(5).Info("Process[%d] CPU affinity already match current process's affinity %x", targetPID, currentProcessCPUs) + klog.V(5).Info("Process[%d] CPU affinity already match current process's affinity %s", targetPID, printCPUSet(currentProcessCPUs)) return nil } - klog.Infof("Setting CPU affinity of PID(%d) to %x, was %x", targetPID, currentProcessCPUs, targetProcessCPUs) + klog.Infof("Setting CPU affinity of PID(%d) to %s, was %s", targetPID, printCPUSet(currentProcessCPUs), printCPUSet(targetProcessCPUs)) err = unix.SchedSetaffinity(targetPID, ¤tProcessCPUs) if err != nil { - return fmt.Errorf("can't set CPU affinity of PID(%d) to %x: %w", targetPID, currentProcessCPUs, err) + return fmt.Errorf("can't set CPU affinity of PID(%d) to %s: %w", targetPID, printCPUSet(currentProcessCPUs), err) } return nil } + +// printCPUSet takes a unix.CPUSet and returns a string representation in canonical linux CPU list format. +// e.g. 0-5,8,10,12-3 +// +// See http://man7.org/linux/man-pages/man7/cpuset.7.html#FORMATS +func printCPUSet(cpus unix.CPUSet) string { + + type rng struct { + start int + end int + } + + // Start with a fake range to avoid going out of range while looping + ranges := []rng{{-2, -2}} + + // There is no public API to know the length of unix.CPUSet, so this counter is the + // stopping condition for the loop + remainingSetsCpus := cpus.Count() + + for i := 0; remainingSetsCpus > 0; i++ { + if !cpus.IsSet(i) { + continue + } + + remainingSetsCpus-- + + lastRange := ranges[len(ranges)-1] + if lastRange.end == i-1 { + ranges[len(ranges)-1].end++ + } else { + ranges = append(ranges, rng{start: i, end: i}) + } + } + + var result bytes.Buffer + // discard the fake range with [1:] + for _, r := range ranges[1:] { + if r.start == r.end { + result.WriteString(strconv.Itoa(r.start)) + } else { + result.WriteString(fmt.Sprintf("%d-%d", r.start, r.end)) + } + result.WriteString(",") + } + return strings.TrimRight(result.String(), ",") +} diff --git a/go-controller/pkg/node/ovspinning/ovspinning_linux_test.go b/go-controller/pkg/node/ovspinning/ovspinning_linux_test.go index a47ffc36e4..b272fbef93 100644 --- a/go-controller/pkg/node/ovspinning/ovspinning_linux_test.go +++ b/go-controller/pkg/node/ovspinning/ovspinning_linux_test.go @@ -100,6 +100,39 @@ func TestIsFileNotEmpty(t *testing.T) { assert.False(t, result) } +func TestPrintCPUSetAll(t *testing.T) { + var x unix.CPUSet + for i := 0; i < 16; i++ { + x.Set(i) + } + + assert.Equal(t, + "0-15", + printCPUSet(x), + ) + + assert.Equal(t, + "", + printCPUSet(unix.CPUSet{}), + ) +} + +func TestPrintCPUSetRanges(t *testing.T) { + var x unix.CPUSet + + x.Set(2) + x.Set(3) + x.Set(6) + x.Set(7) + x.Set(8) + x.Set(14) + + assert.Equal(t, + "2-3,6-8,14", + printCPUSet(x), + ) +} + func mockOvsdbProcess(t *testing.T) (int, func()) { ctx, stopCmd := context.WithCancel(context.Background()) defer stopCmd() From e22721177c614dc448684a8a958c1959888b73b7 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Thu, 18 May 2023 14:20:14 +0200 Subject: [PATCH 62/90] Update OCPHACKs to use nodeipt pkg Due to the changes made in https://github.com/ovn-org/ovn-kubernetes/pull/3389 this is necessary Signed-off-by: Surya Seetharaman --- go-controller/pkg/node/OCP_HACKS.go | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/go-controller/pkg/node/OCP_HACKS.go b/go-controller/pkg/node/OCP_HACKS.go index c287d3e97e..50bbe6813a 100644 --- a/go-controller/pkg/node/OCP_HACKS.go +++ b/go-controller/pkg/node/OCP_HACKS.go @@ -6,40 +6,40 @@ package node import ( "fmt" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" - "github.com/coreos/go-iptables/iptables" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" ) // Block MCS Access. https://github.com/openshift/ovn-kubernetes/pull/170 -func generateBlockMCSRules(rules *[]iptRule, protocol iptables.Protocol) { - var delRules []iptRule +func generateBlockMCSRules(rules *[]nodeipt.Rule, protocol iptables.Protocol) { + var delRules []nodeipt.Rule for _, chain := range []string{"FORWARD", "OUTPUT"} { for _, port := range []string{"22623", "22624"} { - *rules = append(*rules, iptRule{ - table: "filter", - chain: chain, - args: []string{"-p", "tcp", "-m", "tcp", "--dport", port, "--syn", "-j", "REJECT"}, - protocol: protocol, + *rules = append(*rules, nodeipt.Rule{ + Table: "filter", + Chain: chain, + Args: []string{"-p", "tcp", "-m", "tcp", "--dport", port, "--syn", "-j", "REJECT"}, + Protocol: protocol, }) // Delete the old "--syn"-less rules on upgrade - delRules = append(delRules, iptRule{ - table: "filter", - chain: chain, - args: []string{"-p", "tcp", "-m", "tcp", "--dport", port, "-j", "REJECT"}, - protocol: protocol, + delRules = append(delRules, nodeipt.Rule{ + Table: "filter", + Chain: chain, + Args: []string{"-p", "tcp", "-m", "tcp", "--dport", port, "-j", "REJECT"}, + Protocol: protocol, }) } } - _ = delIptRules(delRules) + _ = nodeipt.DelRules(delRules) } // insertMCSBlockIptRules inserts iptables rules to block local Machine Config Service // ports. See https://github.com/openshift/ovn-kubernetes/pull/170 func insertMCSBlockIptRules() error { - rules := []iptRule{} + rules := []nodeipt.Rule{} if config.IPv4Mode { generateBlockMCSRules(&rules, iptables.ProtocolIPv4) } From 25effdecf5206d15f1ff681c1fefb818f2622814 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Mon, 20 Mar 2023 14:58:00 +0100 Subject: [PATCH 63/90] Add egressip feature enable flag to CM This commit passes the egressip_enabled_flag to cluster manager pod on control plane so that we can configure the feature from CM for IC Signed-off-by: Surya Seetharaman --- dist/images/ovnkube.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 46c85f70a4..e56d4dbf88 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -1269,6 +1269,17 @@ ovn-cluster-manager() { echo "=============== ovn-cluster-manager (wait for ready_to_start_node) ========== MASTER ONLY" wait_for_event ready_to_start_node + egressip_enabled_flag= + if [[ ${ovn_egressip_enable} == "true" ]]; then + egressip_enabled_flag="--enable-egress-ip" + fi + + egressip_healthcheck_port_flag= + if [[ -n "${ovn_egress_ip_healthcheck_port}" ]]; then + egressip_healthcheck_port_flag="--egressip-node-healthcheck-port=${ovn_egress_ip_healthcheck_port}" + fi + echo "egressip_flags: ${egressip_enabled_flag}, ${egressip_healthcheck_port_flag}" + hybrid_overlay_flags= if [[ ${ovn_hybrid_overlay_enable} == "true" ]]; then hybrid_overlay_flags="--enable-hybrid-overlay" @@ -1329,6 +1340,8 @@ ovn-cluster-manager() { --logfile /var/log/ovn-kubernetes/ovnkube-cluster-manager.log \ ${ovnkube_metrics_tls_opts} \ ${multicast_enabled_flag} \ + ${egressip_enabled_flag} \ + ${egressip_healthcheck_port_flag} \ ${multi_network_enabled_flag} \ --metrics-bind-address ${ovnkube_cluster_manager_metrics_bind_address} \ --host-network-namespace ${ovn_host_network_namespace} & From 156184594f6987a7e01473589fbf90062170358d Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Sat, 25 Mar 2023 17:14:50 +0100 Subject: [PATCH 64/90] Add egressIP factory to cluster-manager This commit does three things: 1) It adds eipFactory to CMWatchFactory which is required for EIP 2) It adds EIPClient and CloudNetworkClient to CMClientset CM = cluster manager This plubming is the foundation to be able to move egressIP bits into CM. Signed-off-by: Surya Seetharaman --- go-controller/pkg/factory/factory.go | 23 ++++++++++++++++++++--- go-controller/pkg/util/kube.go | 4 ++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index ac8b3b039c..36264388c6 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -444,16 +444,33 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( // mode process. func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset) (*WatchFactory, error) { wf := &WatchFactory{ - iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), - informers: make(map[reflect.Type]*informer), - stopChan: make(chan struct{}), + iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), + eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), + cpipcFactory: ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval), + informers: make(map[reflect.Type]*informer), + stopChan: make(chan struct{}), } + if err := egressipapi.AddToScheme(egressipscheme.Scheme); err != nil { + return nil, err + } var err error wf.informers[NodeType], err = newInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer()) if err != nil { return nil, err } + if config.OVNKubernetesFeature.EnableEgressIP { + wf.informers[EgressIPType], err = newInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer()) + if err != nil { + return nil, err + } + } + if util.PlatformTypeIsEgressIPCloudProvider() { + wf.informers[CloudPrivateIPConfigType], err = newInformer(CloudPrivateIPConfigType, wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer()) + if err != nil { + return nil, err + } + } return wf, nil } diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 511e968843..4423c799d1 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -68,6 +68,8 @@ type OVNNodeClientset struct { type OVNClusterManagerClientset struct { KubeClient kubernetes.Interface + EgressIPClient egressipclientset.Interface + CloudNetworkClient ocpcloudnetworkclientset.Interface NetworkAttchDefClient networkattchmentdefclientset.Interface } @@ -86,6 +88,8 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset { return &OVNClusterManagerClientset{ KubeClient: cs.KubeClient, + EgressIPClient: cs.EgressIPClient, + CloudNetworkClient: cs.CloudNetworkClient, NetworkAttchDefClient: cs.NetworkAttchDefClient, } } From 1959bb2895dbe1565947287b4f00b1ebe107975b Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Sat, 25 Mar 2023 17:15:37 +0100 Subject: [PATCH 65/90] Add utility DeleteLogicalRouterStaticRoutesWithPredicateOps This commit adds DeleteLogicalRouterStaticRoutesWithPredicateOps to libovsdbops package. Note that we already had DeleteLogicalRouterStaticRoutesWithPredicate and now we want to just have a util that returns the ops so that we can batch this when doing a deletion for EIP. NOTE: This commit only introduces the utility, its use comes down in future commit. Signed-off-by: Surya Seetharaman --- go-controller/pkg/libovsdbops/router.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/libovsdbops/router.go b/go-controller/pkg/libovsdbops/router.go index bc48f5508a..d34852e4f7 100644 --- a/go-controller/pkg/libovsdbops/router.go +++ b/go-controller/pkg/libovsdbops/router.go @@ -678,6 +678,20 @@ func CreateOrReplaceLogicalRouterStaticRouteWithPredicate(nbClient libovsdbclien // routes from the cache based on a given predicate, deletes them and removes // them from the provided logical router func DeleteLogicalRouterStaticRoutesWithPredicate(nbClient libovsdbclient.Client, routerName string, p logicalRouterStaticRoutePredicate) error { + var ops []libovsdb.Operation + var err error + ops, err = DeleteLogicalRouterStaticRoutesWithPredicateOps(nbClient, ops, routerName, p) + if err != nil { + return err + } + _, err = TransactAndCheck(nbClient, ops) + return err +} + +// DeleteLogicalRouterStaticRoutesWithPredicateOps looks up logical router static +// routes from the cache based on a given predicate, and returns the ops to delete +// them and remove them from the provided logical router +func DeleteLogicalRouterStaticRoutesWithPredicateOps(nbClient libovsdbclient.Client, ops []libovsdb.Operation, routerName string, p logicalRouterStaticRoutePredicate) ([]libovsdb.Operation, error) { router := &nbdb.LogicalRouter{ Name: routerName, } @@ -700,7 +714,7 @@ func DeleteLogicalRouterStaticRoutesWithPredicate(nbClient libovsdbclient.Client } m := newModelClient(nbClient) - return m.Delete(opModels...) + return m.DeleteOps(ops, opModels...) } // DeleteLogicalRouterStaticRoutes deletes the logical router static routes and From 1ef5a9c608d379b5474a237364a500c5c4b32ff9 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Sat, 25 Mar 2023 17:20:02 +0100 Subject: [PATCH 66/90] Split egressip into global and zone controllers This commit does the following: Cluster Manager side: 1) We create a new egressIPController and call that from CM if the feature is enabled 2) We do only WatchEgressNodes, WatchEgressIP and WatchCloudPrivateIPConfig from CM (namespaces, pods will be taken care of by master) 3) We move all the eip healthcheck bits and node allocator bits to CM 4) Any updates to EIP CRD will always be done by CM; master side will only read the CRD 5) All bits for cloud private IP config was moved to CM from master. Master side: 1) We rename egressIPController to egressIPZoneController - this is responsible for doing NBDB configurations for everything happening in its zone and sometimes for pods in remote zones when necessary 2) We add a localZoneNodes similar to what we have in base which holds the list of local zone nodes 3) master will be responsible for adding snats, reroute policies and static routes for eip pods High level logic: - Do SNATs towards egressIP only if egressNode is local to the zone irrespective of whether the pod is local or remote - Do reroute policies towards transit switch only if pod is local to the zone. NOTE: In IC, nexthop is transit switchIP NOT joinIP - Do static routes towards joinIP only if pod is non-local to the zone but egressnode is local to the zone. NOTE: This is NEW, we never used static routes for EIP before. It is an IC only thing. - Do SNATs towards nodeIP only if pod is local to the zone - Duplicate the code for `IsReachableLegacy` and `IsReachableViaGRPC` functions which will now be in both CM and master - but this is temporary to ensure ESVC works in non-IC setup and until we do ESVC refactor for IC. Tests: The commit also fixes unit tests to run in non-ic and ic modes and splits the tests for global controller logic into cluster-manager while keeping the tests to check libovsdb objects in master itself. Signed-off-by: Surya Seetharaman --- .../pkg/clustermanager/clustermanager.go | 15 + .../pkg/clustermanager/egressip_controller.go | 1505 ++ .../egressip_controller_test.go | 2349 +++ .../clustermanager/egressip_event_handler.go | 263 + .../fake_cluster_manager_test.go | 65 + go-controller/pkg/factory/factory.go | 7 - go-controller/pkg/factory/factory_test.go | 15 +- go-controller/pkg/ovn/base_event_handler.go | 8 +- .../egress_services/egress_services_node.go | 85 + .../pkg/ovn/default_network_controller.go | 96 +- go-controller/pkg/ovn/egressip.go | 2300 +-- go-controller/pkg/ovn/egressip_test.go | 12157 +++++++--------- go-controller/pkg/ovn/ovn.go | 16 +- go-controller/pkg/ovn/ovn_test.go | 17 +- go-controller/pkg/syncmap/syncmap.go | 8 + go-controller/pkg/util/kube.go | 2 - test/e2e/egressip.go | 28 +- test/e2e/util.go | 5 + 18 files changed, 10309 insertions(+), 8632 deletions(-) create mode 100644 go-controller/pkg/clustermanager/egressip_controller.go create mode 100644 go-controller/pkg/clustermanager/egressip_controller_test.go create mode 100644 go-controller/pkg/clustermanager/egressip_event_handler.go create mode 100644 go-controller/pkg/clustermanager/fake_cluster_manager_test.go diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 65a53e5e6d..a32abbed75 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -31,6 +31,9 @@ type ClusterManager struct { wf *factory.WatchFactory wg *sync.WaitGroup secondaryNetClusterManager *secondaryNetworkClusterManager + // Controller used for programming node allocation for egress IP + // The OVN DB setup is handled by egressIPZoneController that runs in ovnkube-controller + eIPC *egressIPClusterController // event recorder used to post events to k8s recorder record.EventRecorder @@ -66,6 +69,9 @@ func NewClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.W return nil, err } } + if config.OVNKubernetesFeature.EnableEgressIP { + cm.eIPC = newEgressIPController(ovnClient, wf, recorder) + } return cm, nil } @@ -93,6 +99,12 @@ func (cm *ClusterManager) Start(ctx context.Context) error { } } + if config.OVNKubernetesFeature.EnableEgressIP { + if err := cm.eIPC.Start(); err != nil { + return err + } + } + return nil } @@ -104,5 +116,8 @@ func (cm *ClusterManager) Stop() { if config.OVNKubernetesFeature.EnableMultiNetwork { cm.secondaryNetClusterManager.Stop() } + if config.OVNKubernetesFeature.EnableEgressIP { + cm.eIPC.Stop() + } metrics.UnregisterClusterManagerFunctional() } diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go new file mode 100644 index 0000000000..f2d8d0587f --- /dev/null +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -0,0 +1,1505 @@ +package clustermanager + +import ( + "context" + "encoding/hex" + "encoding/json" + "fmt" + "net" + "os" + "reflect" + "sort" + "strings" + "sync" + "syscall" + "time" + + ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" + objretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" +) + +const ( + egressIPReachabilityCheckInterval = 5 * time.Second +) + +type egressIPHealthcheckClientAllocator struct{} + +func (hccAlloc *egressIPHealthcheckClientAllocator) allocate(nodeName string) healthcheck.EgressIPHealthClient { + return healthcheck.NewEgressIPHealthClient(nodeName) +} + +func isReachableViaGRPC(mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient, healthCheckPort, totalTimeout int) bool { + dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Duration(totalTimeout)*time.Second) + defer dialCancel() + + if !healthClient.IsConnected() { + // gRPC session is not up. Attempt to connect and if that suceeds, we will declare node as reacheable. + return healthClient.Connect(dialCtx, mgmtIPs, healthCheckPort) + } + + // gRPC session is already established. Send a probe, which will succeed, or close the session. + return healthClient.Probe(dialCtx) +} + +type egressIPDialer interface { + dial(ip net.IP, timeout time.Duration) bool +} + +type egressIPDial struct{} + +var dialer egressIPDialer = &egressIPDial{} + +type healthcheckClientAllocator interface { + allocate(nodeName string) healthcheck.EgressIPHealthClient +} + +// Blantant copy from: https://github.com/openshift/sdn/blob/master/pkg/network/common/egressip.go#L499-L505 +// Ping a node and return whether or not we think it is online. We do this by trying to +// open a TCP connection to the "discard" service (port 9); if the node is offline, the +// attempt will either time out with no response, or else return "no route to host" (and +// we will return false). If the node is online then we presumably will get a "connection +// refused" error; but the code below assumes that anything other than timeout or "no +// route" indicates that the node is online. +func (e *egressIPDial) dial(ip net.IP, timeout time.Duration) bool { + conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip.String(), "9"), timeout) + if conn != nil { + conn.Close() + } + if opErr, ok := err.(*net.OpError); ok { + if opErr.Timeout() { + return false + } + if sysErr, ok := opErr.Err.(*os.SyscallError); ok && sysErr.Err == syscall.EHOSTUNREACH { + return false + } + } + return true +} + +var hccAllocator healthcheckClientAllocator = &egressIPHealthcheckClientAllocator{} + +// egressNode is a cache helper used for egress IP assignment, representing an egress node +type egressNode struct { + egressIPConfig *util.ParsedNodeEgressIPConfiguration + mgmtIPs []net.IP + allocations map[string]string + healthClient healthcheck.EgressIPHealthClient + isReady bool + isReachable bool + isEgressAssignable bool + name string +} + +func (e *egressNode) getAllocationCountForEgressIP(name string) (count int) { + for _, egressIPName := range e.allocations { + if egressIPName == name { + count++ + } + } + return +} + +// isAnyClusterNodeIP verifies that the IP is not any node IP. +func (eIPC *egressIPClusterController) isAnyClusterNodeIP(ip net.IP) *egressNode { + for _, eNode := range eIPC.allocator.cache { + if ip.Equal(eNode.egressIPConfig.V6.IP) || ip.Equal(eNode.egressIPConfig.V4.IP) { + return eNode + } + } + return nil +} + +type EgressIPPatchStatus struct { + Op string `json:"op"` + Path string `json:"path"` + Value egressipv1.EgressIPStatus `json:"value"` +} + +// patchReplaceEgressIPStatus performs a replace patch operation of the egress +// IP status by replacing the status with the provided value. This allows us to +// update only the status field, without overwriting any other. This is +// important because processing egress IPs can take a while (when running on a +// public cloud and in the worst case), hence we don't want to perform a full +// object update which risks resetting the EgressIP object's fields to the state +// they had when we started processing the change. +func (eIPC *egressIPClusterController) patchReplaceEgressIPStatus(name string, statusItems []egressipv1.EgressIPStatusItem) error { + klog.Infof("Patching status on EgressIP %s: %v", name, statusItems) + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + t := []EgressIPPatchStatus{ + { + Op: "replace", + Path: "/status", + Value: egressipv1.EgressIPStatus{ + Items: statusItems, + }, + }, + } + op, err := json.Marshal(&t) + if err != nil { + return fmt.Errorf("error serializing status patch operation: %+v, err: %v", statusItems, err) + } + return eIPC.kube.PatchEgressIP(name, op) + }) +} + +func (eIPC *egressIPClusterController) getAllocationTotalCount() float64 { + count := 0 + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + for _, eNode := range eIPC.allocator.cache { + count += len(eNode.allocations) + } + return float64(count) +} + +type allocator struct { + *sync.Mutex + // A cache used for egress IP assignments containing data for all cluster nodes + // used for egress IP assignments + cache map[string]*egressNode +} + +type cloudPrivateIPConfigOp struct { + toAdd string + toDelete string +} + +// ipStringToCloudPrivateIPConfigName converts the net.IP string representation +// to a CloudPrivateIPConfig compatible name. + +// The string representation of the IPv6 address fc00:f853:ccd:e793::54 will be +// represented as: fc00.f853.0ccd.e793.0000.0000.0000.0054 + +// We thus need to fully expand the IP string and replace every fifth +// character's colon with a dot. +func ipStringToCloudPrivateIPConfigName(ipString string) (name string) { + ip := net.ParseIP(ipString) + if ip.To4() != nil { + return ipString + } + dst := make([]byte, hex.EncodedLen(len(ip))) + hex.Encode(dst, ip) + for i := 0; i < len(dst); i += 4 { + if len(dst)-i == 4 { + name += string(dst[i : i+4]) + } else { + name += string(dst[i:i+4]) + "." + } + } + return +} + +func (eIPC *egressIPClusterController) executeCloudPrivateIPConfigOps(egressIPName string, ops map[string]*cloudPrivateIPConfigOp) error { + for egressIP, op := range ops { + cloudPrivateIPConfigName := ipStringToCloudPrivateIPConfigName(egressIP) + cloudPrivateIPConfig, err := eIPC.watchFactory.GetCloudPrivateIPConfig(cloudPrivateIPConfigName) + // toAdd and toDelete is non-empty, this indicates an UPDATE for which + // the object **must** exist, if not: that's an error. + if op.toAdd != "" && op.toDelete != "" { + if err != nil { + return fmt.Errorf("cloud update request failed for CloudPrivateIPConfig: %s, could not get item, err: %v", cloudPrivateIPConfigName, err) + } + // Do not update if object is being deleted + if !cloudPrivateIPConfig.GetDeletionTimestamp().IsZero() { + return fmt.Errorf("cloud update request failed, CloudPrivateIPConfig: %s is being deleted", cloudPrivateIPConfigName) + } + cloudPrivateIPConfig.Spec.Node = op.toAdd + if _, err := eIPC.kube.UpdateCloudPrivateIPConfig(cloudPrivateIPConfig); err != nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: egressIPName, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "CloudUpdateFailed", "egress IP: %s for object EgressIP: %s could not be updated, err: %v", egressIP, egressIPName, err) + return fmt.Errorf("cloud update request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) + } + // toAdd is non-empty, this indicates an ADD + // if the object already exists for the specified node that's a no-op + // if the object already exists and the request is for a different node, that's an error + } else if op.toAdd != "" { + if err == nil { + if op.toAdd == cloudPrivateIPConfig.Spec.Node { + klog.Infof("CloudPrivateIPConfig: %s already assigned to node: %s", cloudPrivateIPConfigName, cloudPrivateIPConfig.Spec.Node) + continue + } + return fmt.Errorf("cloud create request failed for CloudPrivateIPConfig: %s, err: item exists", cloudPrivateIPConfigName) + } + cloudPrivateIPConfig := ocpcloudnetworkapi.CloudPrivateIPConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: cloudPrivateIPConfigName, + Annotations: map[string]string{ + util.OVNEgressIPOwnerRefLabel: egressIPName, + }, + }, + Spec: ocpcloudnetworkapi.CloudPrivateIPConfigSpec{ + Node: op.toAdd, + }, + } + if _, err := eIPC.kube.CreateCloudPrivateIPConfig(&cloudPrivateIPConfig); err != nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: egressIPName, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "CloudAssignmentFailed", "egress IP: %s for object EgressIP: %s could not be created, err: %v", egressIP, egressIPName, err) + return fmt.Errorf("cloud add request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) + } + // toDelete is non-empty, this indicates a DELETE - if the object does not exist, log an Info message and continue with the next op. + // The reason for why we are not throwing an error here is that desired state (deleted) == isState (object not found). + // If for whatever reason we have a pending toDelete op for a deleted object, then this op should simply be silently ignored. + // Any other error, return an error to trigger a retry. + } else if op.toDelete != "" { + if err != nil { + if apierrors.IsNotFound(err) { + klog.Infof("Cloud deletion request failed for CloudPrivateIPConfig: %s, item already deleted, err: %v", cloudPrivateIPConfigName, err) + continue + } else { + return fmt.Errorf("cloud deletion request failed for CloudPrivateIPConfig: %s, could not get item, err: %v", cloudPrivateIPConfigName, err) + } + } + if err := eIPC.kube.DeleteCloudPrivateIPConfig(cloudPrivateIPConfigName); err != nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: egressIPName, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "CloudDeletionFailed", "egress IP: %s for object EgressIP: %s could not be deleted, err: %v", egressIP, egressIPName, err) + return fmt.Errorf("cloud deletion request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) + } + } + } + return nil +} + +// executeCloudPrivateIPConfigChange computes a diff between what needs to be +// assigned/removed and executes the object modification afterwards. +// Specifically: if one egress IP is moved from nodeA to nodeB, we actually care +// about an update on the CloudPrivateIPConfig object represented by that egress +// IP, cloudPrivateIPConfigOp is a helper used to determine that sort of +// operations from toAssign/toRemove +func (eIPC *egressIPClusterController) executeCloudPrivateIPConfigChange(egressIPName string, toAssign, toRemove []egressipv1.EgressIPStatusItem) error { + eIPC.pendingCloudPrivateIPConfigsMutex.Lock() + defer eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() + ops := make(map[string]*cloudPrivateIPConfigOp, len(toAssign)+len(toRemove)) + for _, assignment := range toAssign { + ops[assignment.EgressIP] = &cloudPrivateIPConfigOp{ + toAdd: assignment.Node, + } + } + for _, removal := range toRemove { + if op, exists := ops[removal.EgressIP]; exists { + op.toDelete = removal.Node + } else { + ops[removal.EgressIP] = &cloudPrivateIPConfigOp{ + toDelete: removal.Node, + } + } + } + // Merge ops into the existing pendingCloudPrivateIPConfigsOps. + // This allows us to: + // a) execute only the new ops + // b) keep track of any pending changes + if len(ops) > 0 { + if _, ok := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName]; !ok { + // Set all operations for the EgressIP object if none are in the cache currently. + eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] = ops + } else { + for cloudPrivateIP, op := range ops { + if _, ok := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP]; !ok { + // If this specific EgressIP object's CloudPrivateIPConfig address currently has no + // op, simply set it. + eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP] = op + } else { + // If an existing operation for this CloudPrivateIP exists, then the following logic should + // apply: + // If toDelete is currently set: keep the current toDelete. Theoretically, the oldest toDelete + // is the good one. If toDelete if currently not set, overwrite it with the new value. + // If toAdd is currently set: overwrite with the new toAdd. Theoretically, the newest toAdd is + // the good one. + // Therefore, only replace toAdd over a previously existing op and only replace toDelete if + // it's unset. + if op.toAdd != "" { + eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toAdd = op.toAdd + } + if eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toDelete == "" { + eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toDelete = op.toDelete + } + } + } + } + } + return eIPC.executeCloudPrivateIPConfigOps(egressIPName, ops) +} + +type egressIPClusterController struct { + recorder record.EventRecorder + stopChan chan struct{} + wg *sync.WaitGroup + kube *kube.KubeOVN + // egressIPAssignmentMutex is used to ensure a safe updates between + // concurrent go-routines which could be modifying the egress IP status + // assignment simultaneously. Currently WatchEgressNodes and WatchEgressIP + // run two separate go-routines which do this. + egressIPAssignmentMutex *sync.Mutex + // pendingCloudPrivateIPConfigsMutex is used to ensure synchronized access + // to pendingCloudPrivateIPConfigsOps which is accessed by the egress IP and + // cloudPrivateIPConfig go-routines + pendingCloudPrivateIPConfigsMutex *sync.Mutex + // pendingCloudPrivateIPConfigsOps is a cache of pending + // CloudPrivateIPConfig changes that we are waiting on an answer for. Items + // in this map are only ever removed once the op is fully finished and we've + // been notified of this. That means: + // - On add operations we only delete once we've seen that the + // CloudPrivateIPConfig is fully added. + // - On delete: when it's fully deleted. + // - On update: once we finish processing the add - which comes after the + // delete. + pendingCloudPrivateIPConfigsOps map[string]map[string]*cloudPrivateIPConfigOp + // allocator is a cache of egress IP centric data needed to when both route + // health-checking and tracking allocations made + allocator allocator + // watchFactory watching k8s objects + watchFactory *factory.WatchFactory + // EgressIP Node reachability total timeout configuration + egressIPTotalTimeout int + // reachability check interval + reachabilityCheckInterval time.Duration + // EgressIP Node reachability gRPC port (0 means it should use dial instead) + egressIPNodeHealthCheckPort int + // retry framework for Egress nodes + retryEgressNodes *objretry.RetryFramework + // retry framework for egress IP + retryEgressIPs *objretry.RetryFramework + // retry framework for Cloud private IP config + retryCloudPrivateIPConfig *objretry.RetryFramework + // egressNodes events factory handler + egressNodeHandler *factory.Handler + // egressIP events factory handler + egressIPHandler *factory.Handler + // cloudPrivateIPConfig events factory handler + cloudPrivateIPConfigHandler *factory.Handler +} + +func newEgressIPController(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, recorder record.EventRecorder) *egressIPClusterController { + kube := &kube.KubeOVN{ + Kube: kube.Kube{KClient: ovnClient.KubeClient}, + EIPClient: ovnClient.EgressIPClient, + CloudNetworkClient: ovnClient.CloudNetworkClient, + } + wg := &sync.WaitGroup{} + eIPC := &egressIPClusterController{ + kube: kube, + wg: wg, + egressIPAssignmentMutex: &sync.Mutex{}, + pendingCloudPrivateIPConfigsMutex: &sync.Mutex{}, + pendingCloudPrivateIPConfigsOps: make(map[string]map[string]*cloudPrivateIPConfigOp), + allocator: allocator{&sync.Mutex{}, make(map[string]*egressNode)}, + watchFactory: wf, + recorder: recorder, + egressIPTotalTimeout: config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout, + reachabilityCheckInterval: egressIPReachabilityCheckInterval, + egressIPNodeHealthCheckPort: config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort, + } + eIPC.initRetryFramework() + return eIPC +} + +func (eIPC *egressIPClusterController) initRetryFramework() { + eIPC.retryEgressNodes = eIPC.newRetryFramework(factory.EgressNodeType) + eIPC.retryEgressIPs = eIPC.newRetryFramework(factory.EgressIPType) + if util.PlatformTypeIsEgressIPCloudProvider() { + eIPC.retryCloudPrivateIPConfig = eIPC.newRetryFramework(factory.CloudPrivateIPConfigType) + } +} + +func (eIPC *egressIPClusterController) newRetryFramework(objectType reflect.Type) *objretry.RetryFramework { + eventHandler := &egressIPClusterControllerEventHandler{ + objType: objectType, + eIPC: eIPC, + syncFunc: nil, + } + resourceHandler := &objretry.ResourceHandler{ + HasUpdateFunc: true, // all egressIP types have update func + NeedsUpdateDuringRetry: true, // true for all egressIP types + ObjType: objectType, + EventHandler: eventHandler, + } + return objretry.NewRetryFramework(eIPC.stopChan, eIPC.wg, eIPC.watchFactory, resourceHandler) +} + +func (eIPC *egressIPClusterController) Start() error { + var err error + // In cluster manager, we only need to watch for egressNodes, egressIPs + // and cloudPrivateIPConfig + if eIPC.egressNodeHandler, err = eIPC.WatchEgressNodes(); err != nil { + return fmt.Errorf("unable to watch egress nodes %w", err) + } + if eIPC.egressIPHandler, err = eIPC.WatchEgressIP(); err != nil { + return err + } + if util.PlatformTypeIsEgressIPCloudProvider() { + if eIPC.cloudPrivateIPConfigHandler, err = eIPC.WatchCloudPrivateIPConfig(); err != nil { + return err + } + } + if config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout == 0 { + klog.V(2).Infof("EgressIP node reachability check disabled") + } else if config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort != 0 { + klog.Infof("EgressIP node reachability enabled and using gRPC port %d", + config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort) + } + return nil +} + +// WatchEgressNodes starts the watching of egress assignable nodes and calls +// back the appropriate handler logic. +func (eIPC *egressIPClusterController) WatchEgressNodes() (*factory.Handler, error) { + return eIPC.retryEgressNodes.WatchResource() +} + +// WatchCloudPrivateIPConfig starts the watching of cloudprivateipconfigs +// resource and calls back the appropriate handler logic. +func (eIPC *egressIPClusterController) WatchCloudPrivateIPConfig() (*factory.Handler, error) { + return eIPC.retryCloudPrivateIPConfig.WatchResource() +} + +// WatchEgressIP starts the watching of egressip resource and calls back the +// appropriate handler logic. It also initiates the other dedicated resource +// handlers for egress IP setup: namespaces, pods. +func (eIPC *egressIPClusterController) WatchEgressIP() (*factory.Handler, error) { + return eIPC.retryEgressIPs.WatchResource() +} + +func (eIPC *egressIPClusterController) Stop() { + close(eIPC.stopChan) + eIPC.wg.Wait() + if eIPC.egressNodeHandler != nil { + eIPC.watchFactory.RemoveNodeHandler(eIPC.egressNodeHandler) + } + if eIPC.egressIPHandler != nil { + eIPC.watchFactory.RemoveEgressIPHandler(eIPC.egressIPHandler) + } + if eIPC.cloudPrivateIPConfigHandler != nil { + eIPC.watchFactory.RemoveCloudPrivateIPConfigHandler(eIPC.cloudPrivateIPConfigHandler) + } +} + +type egressIPNodeStatus struct { + Node string + Name string +} + +// getSortedEgressData returns a sorted slice of all egressNodes based on the +// amount of allocations found in the cache +func (eIPC *egressIPClusterController) getSortedEgressData() ([]*egressNode, map[string]egressIPNodeStatus) { + assignableNodes := []*egressNode{} + allAllocations := make(map[string]egressIPNodeStatus) + for _, eNode := range eIPC.allocator.cache { + if eNode.isEgressAssignable && eNode.isReady && eNode.isReachable { + assignableNodes = append(assignableNodes, eNode) + } + for ip, eipName := range eNode.allocations { + allAllocations[ip] = egressIPNodeStatus{Node: eNode.name, Name: eipName} + } + } + sort.Slice(assignableNodes, func(i, j int) bool { + return len(assignableNodes[i].allocations) < len(assignableNodes[j].allocations) + }) + return assignableNodes, allAllocations +} + +func (eIPC *egressIPClusterController) initEgressNodeReachability(nodes []interface{}) error { + go eIPC.checkEgressNodesReachability() + return nil +} + +func (eIPC *egressIPClusterController) setNodeEgressAssignable(nodeName string, isAssignable bool) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if eNode, exists := eIPC.allocator.cache[nodeName]; exists { + eNode.isEgressAssignable = isAssignable + // if the node is not assignable/ready/reachable anymore we need to + // empty all of it's allocations from our cache since we'll clear all + // assignments from this node later on, because of this. + if !isAssignable { + eNode.allocations = make(map[string]string) + } + } +} + +func (eIPC *egressIPClusterController) isEgressNodeReady(egressNode *v1.Node) bool { + for _, condition := range egressNode.Status.Conditions { + if condition.Type == v1.NodeReady { + return condition.Status == v1.ConditionTrue + } + } + return false +} + +func isReachableLegacy(node string, mgmtIPs []net.IP, totalTimeout int) bool { + var retryTimeOut, initialRetryTimeOut time.Duration + + numMgmtIPs := len(mgmtIPs) + if numMgmtIPs == 0 { + return false + } + + switch totalTimeout { + // Check if we need to do node reachability check + case 0: + return true + case 1: + // Using time duration for initial retry with 700/numIPs msec and retry of 100/numIPs msec + // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. + initialRetryTimeOut = time.Duration(700/numMgmtIPs) * time.Millisecond + retryTimeOut = time.Duration(100/numMgmtIPs) * time.Millisecond + default: + // Using time duration for initial retry with 900/numIPs msec + // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. + initialRetryTimeOut = time.Duration(900/numMgmtIPs) * time.Millisecond + retryTimeOut = initialRetryTimeOut + } + + timeout := initialRetryTimeOut + endTime := time.Now().Add(time.Second * time.Duration(totalTimeout)) + for time.Now().Before(endTime) { + for _, ip := range mgmtIPs { + if dialer.dial(ip, timeout) { + return true + } + } + time.Sleep(100 * time.Millisecond) + timeout = retryTimeOut + } + klog.Errorf("Failed reachability check for %s", node) + return false +} + +// checkEgressNodesReachability continuously checks if all nodes used for egress +// IP assignment are reachable, and updates the nodes following the result. This +// is important because egress IP is based upon routing traffic to these nodes, +// and if they aren't reachable we shouldn't be using them for egress IP. +func (eIPC *egressIPClusterController) checkEgressNodesReachability() { + timer := time.NewTicker(eIPC.reachabilityCheckInterval) + defer timer.Stop() + for { + select { + case <-timer.C: + checkEgressNodesReachabilityIterate(eIPC) + case <-eIPC.stopChan: + klog.V(5).Infof("Stop channel got triggered: will stop checkEgressNodesReachability") + return + } + } +} + +func checkEgressNodesReachabilityIterate(eIPC *egressIPClusterController) { + reAddOrDelete := map[string]bool{} + eIPC.allocator.Lock() + for _, eNode := range eIPC.allocator.cache { + if eNode.isEgressAssignable && eNode.isReady { + wasReachable := eNode.isReachable + isReachable := eIPC.isReachable(eNode.name, eNode.mgmtIPs, eNode.healthClient) + if wasReachable && !isReachable { + reAddOrDelete[eNode.name] = true + } else if !wasReachable && isReachable { + reAddOrDelete[eNode.name] = false + } + eNode.isReachable = isReachable + } else { + // End connection (if there is one). This is important because + // it accounts for cases where node is not labelled with + // egress-assignable, so connection is no longer needed. Calling + // this on a already disconnected node is expected to be cheap. + eNode.healthClient.Disconnect() + } + } + eIPC.allocator.Unlock() + for nodeName, shouldDelete := range reAddOrDelete { + if shouldDelete { + metrics.RecordEgressIPUnreachableNode() + klog.Warningf("Node: %s is detected as unreachable, deleting it from egress assignment", nodeName) + if err := eIPC.deleteEgressNode(nodeName); err != nil { + klog.Errorf("Node: %s is detected as unreachable, but could not re-assign egress IPs, err: %v", nodeName, err) + } + } else { + klog.Infof("Node: %s is detected as reachable and ready again, adding it to egress assignment", nodeName) + if err := eIPC.addEgressNode(nodeName); err != nil { + klog.Errorf("Node: %s is detected as reachable and ready again, but could not re-assign egress IPs, err: %v", nodeName, err) + } + } + } +} + +func (eIPC *egressIPClusterController) isReachable(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool { + // Check if we need to do node reachability check + if eIPC.egressIPTotalTimeout == 0 { + return true + } + + if eIPC.egressIPNodeHealthCheckPort == 0 { + return isReachableLegacy(nodeName, mgmtIPs, eIPC.egressIPTotalTimeout) + } + return isReachableViaGRPC(mgmtIPs, healthClient, eIPC.egressIPNodeHealthCheckPort, eIPC.egressIPTotalTimeout) +} + +func (eIPC *egressIPClusterController) isEgressNodeReachable(egressNode *v1.Node) bool { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if eNode, exists := eIPC.allocator.cache[egressNode.Name]; exists { + return eNode.isReachable || eIPC.isReachable(eNode.name, eNode.mgmtIPs, eNode.healthClient) + } + return false +} + +func (eIPC *egressIPClusterController) setNodeEgressReady(nodeName string, isReady bool) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if eNode, exists := eIPC.allocator.cache[nodeName]; exists { + eNode.isReady = isReady + // see setNodeEgressAssignable + if !isReady { + eNode.allocations = make(map[string]string) + } + } +} + +func (eIPC *egressIPClusterController) setNodeEgressReachable(nodeName string, isReachable bool) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if eNode, exists := eIPC.allocator.cache[nodeName]; exists { + eNode.isReachable = isReachable + // see setNodeEgressAssignable + if !isReachable { + eNode.allocations = make(map[string]string) + } + } +} + +func (eIPC *egressIPClusterController) addEgressNode(nodeName string) error { + var errors []error + klog.V(5).Infof("Egress node: %s about to be initialized", nodeName) + + // If a node has been labelled for egress IP we need to check if there are any + // egress IPs which are missing an assignment. If there are, we need to send a + // synthetic update since reconcileEgressIP will then try to assign those IPs to + // this node (if possible) + egressIPs, err := eIPC.kube.GetEgressIPs() + if err != nil { + return fmt.Errorf("unable to list EgressIPs, err: %v", err) + } + for _, egressIP := range egressIPs.Items { + if len(egressIP.Spec.EgressIPs) != len(egressIP.Status.Items) { + // Send a "synthetic update" on all egress IPs which are not fully + // assigned, the reconciliation loop for WatchEgressIP will try to + // assign stuff to this new node. The workqueue's delta FIFO + // implementation will not trigger a watch event for updates on + // objects which have no semantic difference, hence: call the + // reconciliation function directly. + if err := eIPC.reconcileEgressIP(nil, &egressIP); err != nil { + errors = append(errors, fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err)) + } + } + } + + if len(errors) > 0 { + return utilerrors.NewAggregate(errors) + } + return nil +} + +// deleteNodeForEgress remove the default allow logical router policies for the +// node and removes the node from the allocator cache. +func (eIPC *egressIPClusterController) deleteNodeForEgress(node *v1.Node) { + eIPC.allocator.Lock() + if eNode, exists := eIPC.allocator.cache[node.Name]; exists { + eNode.healthClient.Disconnect() + } + delete(eIPC.allocator.cache, node.Name) + eIPC.allocator.Unlock() +} + +func (eIPC *egressIPClusterController) deleteEgressNode(nodeName string) error { + var errorAggregate []error + klog.V(5).Infof("Egress node: %s about to be removed", nodeName) + // Since the node has been labelled as "not usable" for egress IP + // assignments we need to find all egress IPs which have an assignment to + // it, and move them elsewhere. + egressIPs, err := eIPC.kube.GetEgressIPs() + if err != nil { + return fmt.Errorf("unable to list EgressIPs, err: %v", err) + } + for _, egressIP := range egressIPs.Items { + for _, status := range egressIP.Status.Items { + if status.Node == nodeName { + // Send a "synthetic update" on all egress IPs which have an + // assignment to this node. The reconciliation loop for + // WatchEgressIP will see that the current assignment status to + // this node is invalid and try to re-assign elsewhere. The + // workqueue's delta FIFO implementation will not trigger a + // watch event for updates on objects which have no semantic + // difference, hence: call the reconciliation function directly. + if err := eIPC.reconcileEgressIP(nil, &egressIP); err != nil { + errorAggregate = append(errorAggregate, fmt.Errorf("re-assignment for EgressIP: %s failed, unable to update object, err: %v", egressIP.Name, err)) + } + break + } + } + } + if len(errorAggregate) > 0 { + return utilerrors.NewAggregate(errorAggregate) + } + return nil +} + +func (eIPC *egressIPClusterController) initEgressIPAllocator(node *v1.Node) (err error) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if _, exists := eIPC.allocator.cache[node.Name]; !exists { + var parsedEgressIPConfig *util.ParsedNodeEgressIPConfiguration + if util.PlatformTypeIsEgressIPCloudProvider() { + parsedEgressIPConfig, err = util.ParseCloudEgressIPConfig(node) + if err != nil { + return fmt.Errorf("unable to use cloud node for egress assignment, err: %v", err) + } + } else { + parsedEgressIPConfig, err = util.ParseNodePrimaryIfAddr(node) + if err != nil { + return fmt.Errorf("unable to use node for egress assignment, err: %v", err) + } + } + nodeSubnets, err := util.ParseNodeHostSubnetAnnotation(node, types.DefaultNetworkName) + if err != nil { + return fmt.Errorf("failed to parse node %s subnets annotation %v", node.Name, err) + } + mgmtIPs := make([]net.IP, len(nodeSubnets)) + for i, subnet := range nodeSubnets { + mgmtIPs[i] = util.GetNodeManagementIfAddr(subnet).IP + } + eIPC.allocator.cache[node.Name] = &egressNode{ + name: node.Name, + egressIPConfig: parsedEgressIPConfig, + mgmtIPs: mgmtIPs, + allocations: make(map[string]string), + healthClient: hccAllocator.allocate(node.Name), + } + } + return nil +} + +// deleteAllocatorEgressIPAssignments deletes the allocation as to keep the +// cache state correct, also see addAllocatorEgressIPAssignments +func (eIPC *egressIPClusterController) deleteAllocatorEgressIPAssignments(statusAssignments []egressipv1.EgressIPStatusItem) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + for _, status := range statusAssignments { + if eNode, exists := eIPC.allocator.cache[status.Node]; exists { + delete(eNode.allocations, status.EgressIP) + } + } +} + +// deleteAllocatorEgressIPAssignmentIfExists deletes egressIP config from node allocations map +// if the entry is available and returns assigned node name, otherwise returns empty string. +func (eIPC *egressIPClusterController) deleteAllocatorEgressIPAssignmentIfExists(name, egressIP string) string { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + for nodeName, eNode := range eIPC.allocator.cache { + if egressIPName, exists := eNode.allocations[egressIP]; exists && egressIPName == name { + delete(eNode.allocations, egressIP) + return nodeName + } + } + return "" +} + +// addAllocatorEgressIPAssignments adds the allocation to the cache, so that +// they are tracked during the life-cycle of ovnkube-master +func (eIPC *egressIPClusterController) addAllocatorEgressIPAssignments(name string, statusAssignments []egressipv1.EgressIPStatusItem) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + for _, status := range statusAssignments { + if eNode, exists := eIPC.allocator.cache[status.Node]; exists { + eNode.allocations[status.EgressIP] = name + } + } +} + +func (eIPC *egressIPClusterController) reconcileEgressIP(old, new *egressipv1.EgressIP) (err error) { + // Lock the assignment, this is needed because this function can end up + // being called from WatchEgressNodes and WatchEgressIP, i.e: two different + // go-routines and we need to make sure the assignment is safe. + eIPC.egressIPAssignmentMutex.Lock() + defer eIPC.egressIPAssignmentMutex.Unlock() + + name := "" + + // Initialize a status which will be used to compare against + // new.spec.egressIPs and decide on what from the status should get deleted + // or kept. + status := []egressipv1.EgressIPStatusItem{} + + // Initialize an empty objects as to avoid SIGSEGV. The code should play + // nicely with empty objects though. + newEIP := &egressipv1.EgressIP{} + + // Initialize a sets.String which holds egress IPs that were not fully assigned + // but are allocated and they are meant to be removed. + staleEgressIPs := sets.NewString() + if old != nil { + name = old.Name + status = old.Status.Items + staleEgressIPs.Insert(old.Spec.EgressIPs...) + } + if new != nil { + newEIP = new + name = newEIP.Name + status = newEIP.Status.Items + if staleEgressIPs.Len() > 0 { + for _, egressIP := range newEIP.Spec.EgressIPs { + if staleEgressIPs.Has(egressIP) { + staleEgressIPs.Delete(egressIP) + } + } + } + } + + // Validate the spec and use only the valid egress IPs when performing any + // successive operations, theoretically: the user could specify invalid IP + // addresses, which would break us. + validSpecIPs, err := eIPC.validateEgressIPSpec(name, newEIP.Spec.EgressIPs) + if err != nil { + return fmt.Errorf("invalid EgressIP spec, err: %v", err) + } + + // Validate the status, on restart it could be the case that what might have + // been assigned when ovnkube-master last ran is not a valid assignment + // anymore (specifically if ovnkube-master has been crashing for a while). + // Any invalid status at this point in time needs to be removed and assigned + // to a valid node. + validStatus, invalidStatus := eIPC.validateEgressIPStatus(name, status) + for status := range validStatus { + // If the spec has changed and an egress IP has been removed by the + // user: we need to un-assign that egress IP + if !validSpecIPs.Has(status.EgressIP) { + invalidStatus[status] = "" + delete(validStatus, status) + } + } + + invalidStatusLen := len(invalidStatus) + if invalidStatusLen > 0 { + metrics.RecordEgressIPRebalance(invalidStatusLen) + } + + // Add only the diff between what is requested and valid and that which + // isn't already assigned. + ipsToAssign := validSpecIPs + ipsToRemove := sets.New[string]() + statusToAdd := make([]egressipv1.EgressIPStatusItem, 0, len(ipsToAssign)) + statusToKeep := make([]egressipv1.EgressIPStatusItem, 0, len(validStatus)) + for status := range validStatus { + statusToKeep = append(statusToKeep, status) + ipsToAssign.Delete(status.EgressIP) + } + statusToRemove := make([]egressipv1.EgressIPStatusItem, 0, invalidStatusLen) + for status := range invalidStatus { + statusToRemove = append(statusToRemove, status) + ipsToRemove.Insert(status.EgressIP) + } + if ipsToRemove.Len() > 0 { + // The following is added as to ensure that we only add after having + // successfully removed egress IPs. This case is not very important on + // bare-metal (since we execute the add after the remove below, and + // hence have full control of the execution - barring its success), but + // on a cloud: we patch all validStatsuses below, we wait for the status + // on the CloudPrivateIPConfig(s) we create to be set before executing + // anything in the OVN DB (Note that the status will be set by this + // controller in cluster-manager and asynchronously the ovnkube-master + // will read the CRD change and do the necessary plumbing (ADD/UPDATE/DELETE) + // in the OVN DB). + // So, we need to make sure that we delete and + // then add, mainly because if EIP1 is added to nodeX and then EIP2 is + // removed from nodeX, we might remove the setup made for EIP1. The + // add/delete ordering of events is not guaranteed on the cloud where we + // depend on other controllers to execute the work for us however. By + // comparing the spec to the status and applying the following truth + // table we can ensure that order of events. + + // case ID | Egress IP to add | Egress IP to remove | ipsToAssign + // 1 | e1 | e1 | e1 + // 2 | e2 | e1 | - + // 3 | e2 | - | e2 + // 4 | - | e1 | - + + // Case 1 handles updates. Case 2 and 3 makes sure we don't add until we + // successfully delete. Case 4 just shows an example of what would + // happen if we don't have anything to add + ipsToAssign = ipsToAssign.Intersection(ipsToRemove) + } + + if !util.PlatformTypeIsEgressIPCloudProvider() { + if len(statusToRemove) > 0 { + // Delete the statusToRemove from the allocator cache. If we don't + // do this we will occupy assignment positions for the ipsToAssign, + // even though statusToRemove will be removed afterwards + eIPC.deleteAllocatorEgressIPAssignments(statusToRemove) + } + if len(ipsToAssign) > 0 { + statusToAdd = eIPC.assignEgressIPs(name, ipsToAssign.UnsortedList()) + statusToKeep = append(statusToKeep, statusToAdd...) + } + // Add all assignments which are to be kept to the allocator cache, + // allowing us to track all assignments which have been performed and + // avoid incorrect future assignments due to a de-synchronized cache. + eIPC.addAllocatorEgressIPAssignments(name, statusToKeep) + // Update the object only on an ADD/UPDATE. If we are processing a + // DELETE, new will be nil and we should not update the object. + if len(statusToAdd) > 0 || (len(statusToRemove) > 0 && new != nil) { + if err := eIPC.patchReplaceEgressIPStatus(name, statusToKeep); err != nil { + return err + } + } + } else { + // Even when running on a public cloud, we must make sure that we unwire EgressIP + // configuration from OVN *before* we instruct the CloudNetworkConfigController + // to remove the CloudPrivateIPConfig object from the cloud. + // CloudPrivateIPConfig objects can be in the "Deleting" state for a long time, + // waiting for the underlying cloud to finish its action and to report success of the + // unattach operation. Some clouds such as Azure will remove the IP address nearly + // immediately, but then they will take a long time (seconds to minutes) to actually report + // success of the removal operation. + if len(statusToRemove) > 0 { + // Delete all assignments that are to be removed from the allocator + // cache. If we don't do this we will occupy assignment positions for + // the ipsToAdd, even though statusToRemove will be removed afterwards + eIPC.deleteAllocatorEgressIPAssignments(statusToRemove) + // Before updating the cloud private IP object, we need to remove the OVN configuration + // for these invalid statuses so that traffic is not blackholed to non-existing setup in the + // cloud. Thus we patch the egressIP status with the valid set of statuses which will + // trigger an event for the ovnkube-master to take action upon. + // Note that once we figure out the statusToAdd parts below we will trigger an + // update to cloudPrivateIP object which will trigger another patch for the eIP object. + if err := eIPC.patchReplaceEgressIPStatus(name, statusToKeep); err != nil { + return err + } + } + // When egress IP is not fully assigned to a node, then statusToRemove may not + // have those entries, hence retrieve it from staleEgressIPs for removing + // the item from cloudprivateipconfig. + for _, toRemove := range statusToRemove { + if !staleEgressIPs.Has(toRemove.EgressIP) { + continue + } + staleEgressIPs.Delete(toRemove.EgressIP) + } + for staleEgressIP := range staleEgressIPs { + if nodeName := eIPC.deleteAllocatorEgressIPAssignmentIfExists(name, staleEgressIP); nodeName != "" { + statusToRemove = append(statusToRemove, + egressipv1.EgressIPStatusItem{EgressIP: staleEgressIP, Node: nodeName}) + } + } + // If running on a public cloud we should not program OVN just yet for assignment + // operations. We need confirmation from the cloud-network-config-controller that + // it can assign the IPs. reconcileCloudPrivateIPConfig will take care of + // processing the answer from the requests we make here, and update OVN + // accordingly when we know what the outcome is. + if len(ipsToAssign) > 0 { + statusToAdd = eIPC.assignEgressIPs(name, ipsToAssign.UnsortedList()) + statusToKeep = append(statusToKeep, statusToAdd...) + } + // Same as above: Add all assignments which are to be kept to the + // allocator cache, allowing us to track all assignments which have been + // performed and avoid incorrect future assignments due to a + // de-synchronized cache. + eIPC.addAllocatorEgressIPAssignments(name, statusToKeep) + + // Execute CloudPrivateIPConfig changes for assignments which need to be + // added/removed, assignments which don't change do not require any + // further setup. + if err := eIPC.executeCloudPrivateIPConfigChange(name, statusToAdd, statusToRemove); err != nil { + return err + } + } + + // Record the egress IP allocator count + metrics.RecordEgressIPCount(eIPC.getAllocationTotalCount()) + return nil +} + +// assignEgressIPs is the main assignment algorithm for egress IPs to nodes. +// Specifically we have a couple of hard constraints: a) the subnet of the node +// must be able to host the egress IP b) the egress IP cannot be a node IP c) +// the IP cannot already be assigned and reference by another EgressIP object d) +// no two egress IPs for the same EgressIP object can be assigned to the same +// node e) (for public clouds) the amount of egress IPs assigned to one node +// must respect its assignment capacity. Moreover there is a soft constraint: +// the assignments need to be balanced across all cluster nodes, so that no node +// becomes a bottleneck. The balancing is achieved by sorting the nodes in +// ascending order following their existing amount of allocations, and trying to +// assign the egress IP to the node with the lowest amount of allocations every +// time, this does not guarantee complete balance, but mostly complete. +func (eIPC *egressIPClusterController) assignEgressIPs(name string, egressIPs []string) []egressipv1.EgressIPStatusItem { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + assignments := []egressipv1.EgressIPStatusItem{} + assignableNodes, existingAllocations := eIPC.getSortedEgressData() + if len(assignableNodes) == 0 { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "NoMatchingNodeFound", "no assignable nodes for EgressIP: %s, please tag at least one node with label: %s", name, util.GetNodeEgressLabel()) + klog.Errorf("No assignable nodes found for EgressIP: %s and requested IPs: %v", name, egressIPs) + return assignments + } + klog.V(5).Infof("Current assignments are: %+v", existingAllocations) + for _, egressIP := range egressIPs { + klog.V(5).Infof("Will attempt assignment for egress IP: %s", egressIP) + eIP := net.ParseIP(egressIP) + if status, exists := existingAllocations[eIP.String()]; exists { + // On public clouds we will re-process assignments for the same IP + // multiple times due to the nature of syncing each individual + // CloudPrivateIPConfig one at a time. This means that we are + // expected to end up in this situation multiple times per sync. Ex: + // Say we an EgressIP is created with IP1, IP2, IP3. We begin by + // assigning them all the first round. Next we get the + // CloudPrivateIPConfig confirming the addition of IP1, leading us + // to re-assign IP2, IP3, but since we've already assigned them + // we'll end up here. This is not an error. What would be an error + // is if the user created EIP1 with IP1 and a second EIP2 with IP1 + if name == status.Name { + // IP is already assigned for this EgressIP object + assignments = append(assignments, egressipv1.EgressIPStatusItem{ + Node: status.Node, + EgressIP: eIP.String(), + }) + continue + } else { + klog.Errorf("IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node) + return assignments + } + } + if node := eIPC.isAnyClusterNodeIP(eIP); node != nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf( + &eIPRef, + v1.EventTypeWarning, + "UnsupportedRequest", + "Egress IP: %v for object EgressIP: %s is the IP address of node: %s, this is unsupported", eIP, name, node.name, + ) + klog.Errorf("Egress IP: %v is the IP address of node: %s", eIP, node.name) + return assignments + } + for _, eNode := range assignableNodes { + klog.V(5).Infof("Attempting assignment on egress node: %+v", eNode) + if eNode.getAllocationCountForEgressIP(name) > 0 { + klog.V(5).Infof("Node: %s is already in use by another egress IP for this EgressIP: %s, trying another node", eNode.name, name) + continue + } + if eNode.egressIPConfig.Capacity.IP < util.UnlimitedNodeCapacity { + if eNode.egressIPConfig.Capacity.IP-len(eNode.allocations) <= 0 { + klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IP capacity, trying another node", eNode.name) + continue + } + } + if eNode.egressIPConfig.Capacity.IPv4 < util.UnlimitedNodeCapacity && utilnet.IsIPv4(eIP) { + if eNode.egressIPConfig.Capacity.IPv4-getIPFamilyAllocationCount(eNode.allocations, false) <= 0 { + klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv4 capacity, trying another node", eNode.name) + continue + } + } + if eNode.egressIPConfig.Capacity.IPv6 < util.UnlimitedNodeCapacity && utilnet.IsIPv6(eIP) { + if eNode.egressIPConfig.Capacity.IPv6-getIPFamilyAllocationCount(eNode.allocations, true) <= 0 { + klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv6 capacity, trying another node", eNode.name) + continue + } + } + if (eNode.egressIPConfig.V6.Net != nil && eNode.egressIPConfig.V6.Net.Contains(eIP)) || + (eNode.egressIPConfig.V4.Net != nil && eNode.egressIPConfig.V4.Net.Contains(eIP)) { + assignments = append(assignments, egressipv1.EgressIPStatusItem{ + Node: eNode.name, + EgressIP: eIP.String(), + }) + klog.Infof("Successful assignment of egress IP: %s on node: %+v", egressIP, eNode) + eNode.allocations[eIP.String()] = name + break + } + } + } + if len(assignments) == 0 { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "NoMatchingNodeFound", "No matching nodes found, which can host any of the egress IPs: %v for object EgressIP: %s", egressIPs, name) + klog.Errorf("No matching host found for EgressIP: %s", name) + return assignments + } + if len(assignments) < len(egressIPs) { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "UnassignedRequest", "Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", name) + } + return assignments +} + +func getIPFamilyAllocationCount(allocations map[string]string, isIPv6 bool) (count int) { + for allocation := range allocations { + if utilnet.IsIPv4String(allocation) && !isIPv6 { + count++ + } + if utilnet.IsIPv6String(allocation) && isIPv6 { + count++ + } + } + return +} + +func (eIPC *egressIPClusterController) validateEgressIPSpec(name string, egressIPs []string) (sets.Set[string], error) { + validatedEgressIPs := sets.New[string]() + for _, egressIP := range egressIPs { + ip := net.ParseIP(egressIP) + if ip == nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "InvalidEgressIP", "egress IP: %s for object EgressIP: %s is not a valid IP address", egressIP, name) + return nil, fmt.Errorf("unable to parse provided EgressIP: %s, invalid", egressIP) + } + validatedEgressIPs.Insert(ip.String()) + } + return validatedEgressIPs, nil +} + +// validateEgressIPStatus validates if the statuses are valid given what the +// cache knows about all egress nodes. WatchEgressNodes is initialized before +// any other egress IP handler, so the cache should be warm and correct once we +// start going this. +func (eIPC *egressIPClusterController) validateEgressIPStatus(name string, items []egressipv1.EgressIPStatusItem) (map[egressipv1.EgressIPStatusItem]string, map[egressipv1.EgressIPStatusItem]string) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + valid, invalid := make(map[egressipv1.EgressIPStatusItem]string), make(map[egressipv1.EgressIPStatusItem]string) + for _, eIPStatus := range items { + validAssignment := true + eNode, exists := eIPC.allocator.cache[eIPStatus.Node] + if !exists { + klog.Errorf("Allocator error: EgressIP: %s claims to have an allocation on a node which is unassignable for egress IP: %s", name, eIPStatus.Node) + validAssignment = false + } else { + if eNode.getAllocationCountForEgressIP(name) > 1 { + klog.Errorf("Allocator error: EgressIP: %s claims multiple egress IPs on same node: %s, will attempt rebalancing", name, eIPStatus.Node) + validAssignment = false + } + if !eNode.isEgressAssignable { + klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which does not have egress label, will attempt rebalancing", name, eIPStatus.Node) + validAssignment = false + } + if !eNode.isReachable { + klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which is not reachable, will attempt rebalancing", name, eIPStatus.Node) + validAssignment = false + } + if !eNode.isReady { + klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which is not ready, will attempt rebalancing", name, eIPStatus.Node) + validAssignment = false + } + ip := net.ParseIP(eIPStatus.EgressIP) + if ip == nil { + klog.Errorf("Allocator error: EgressIP allocation contains unparsable IP address: %s", eIPStatus.EgressIP) + validAssignment = false + } + if node := eIPC.isAnyClusterNodeIP(ip); node != nil { + klog.Errorf("Allocator error: EgressIP allocation: %s is the IP of node: %s ", ip.String(), node.name) + validAssignment = false + } + if utilnet.IsIPv6(ip) && eNode.egressIPConfig.V6.Net != nil { + if !eNode.egressIPConfig.V6.Net.Contains(ip) { + klog.Errorf("Allocator error: EgressIP allocation: %s on subnet: %s which cannot host it", ip.String(), eNode.egressIPConfig.V4.Net.String()) + validAssignment = false + } + } else if !utilnet.IsIPv6(ip) && eNode.egressIPConfig.V4.Net != nil { + if !eNode.egressIPConfig.V4.Net.Contains(ip) { + klog.Errorf("Allocator error: EgressIP allocation: %s on subnet: %s which cannot host it", ip.String(), eNode.egressIPConfig.V4.Net.String()) + validAssignment = false + } + } else { + klog.Errorf("Allocator error: EgressIP allocation on node: %s which does not support its IP protocol version", eIPStatus.Node) + validAssignment = false + } + } + if validAssignment { + valid[eIPStatus] = "" + } else { + invalid[eIPStatus] = "" + } + } + return valid, invalid +} + +func (eIPC *egressIPClusterController) reconcileCloudPrivateIPConfig(old, new *ocpcloudnetworkapi.CloudPrivateIPConfig) error { + oldCloudPrivateIPConfig, newCloudPrivateIPConfig := &ocpcloudnetworkapi.CloudPrivateIPConfig{}, &ocpcloudnetworkapi.CloudPrivateIPConfig{} + shouldDelete, shouldAdd := false, false + nodeToDelete := "" + + if old != nil { + oldCloudPrivateIPConfig = old + // We need to handle three types of deletes, A) object UPDATE where the + // old egress IP <-> node assignment has been removed. This is indicated + // by the old object having a .status.node set and the new object having + // .status.node empty and the condition on the new being successful. B) + // object UPDATE where egress IP <-> node assignment has been updated. + // This is indicated by .status.node being different on old and new + // objects. C) object DELETE, for which new is nil + shouldDelete = oldCloudPrivateIPConfig.Status.Node != "" || new == nil + // On DELETE we need to delete the .spec.node for the old object + nodeToDelete = oldCloudPrivateIPConfig.Spec.Node + } + if new != nil { + newCloudPrivateIPConfig = new + // We should only proceed to setting things up for objects where the new + // object has the same .spec.node and .status.node, and assignment + // condition being true. This is how the cloud-network-config-controller + // indicates a successful cloud assignment. + shouldAdd = newCloudPrivateIPConfig.Status.Node == newCloudPrivateIPConfig.Spec.Node && + ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned && + v1.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == v1.ConditionTrue + // See above explanation for the delete + shouldDelete = shouldDelete && + (newCloudPrivateIPConfig.Status.Node == "" || newCloudPrivateIPConfig.Status.Node != oldCloudPrivateIPConfig.Status.Node) && + ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned && + v1.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == v1.ConditionTrue + // On UPDATE we need to delete the old .status.node + if shouldDelete { + nodeToDelete = oldCloudPrivateIPConfig.Status.Node + } + } + + // As opposed to reconcileEgressIP, here we are only interested in changes + // made to the status (since we are the only ones performing the change made + // to the spec). So don't process the object if there is no change made to + // the status. + if reflect.DeepEqual(oldCloudPrivateIPConfig.Status, newCloudPrivateIPConfig.Status) { + return nil + } + + if shouldDelete { + // Get the EgressIP owner reference + egressIPName, exists := oldCloudPrivateIPConfig.Annotations[util.OVNEgressIPOwnerRefLabel] + if !exists { + // If a CloudPrivateIPConfig object does not have an egress IP owner reference annotation upon deletion, + // there is no way that the object will get one after deletion. Hence, simply log a warning message here + // for informative purposes instead of throwing the same error and retrying time and time again. + klog.Warningf("CloudPrivateIPConfig object %q was missing the egress IP owner reference annotation "+ + "upon deletion", oldCloudPrivateIPConfig.Name) + return nil + } + // Check if the egress IP has been deleted or not, if we are processing + // a CloudPrivateIPConfig delete because the EgressIP has been deleted + // then we need to remove the setup made for it, but not update the + // object. + egressIP, err := eIPC.kube.GetEgressIP(egressIPName) + isDeleted := apierrors.IsNotFound(err) + if err != nil && !isDeleted { + return err + } + egressIPString := cloudPrivateIPConfigNameToIPString(oldCloudPrivateIPConfig.Name) + statusItem := egressipv1.EgressIPStatusItem{ + Node: nodeToDelete, + EgressIP: egressIPString, + } + // If we are not processing a delete, update the EgressIP object's + // status assignments + if !isDeleted { + // Deleting a status here means updating the object with the statuses we + // want to keep + updatedStatus := []egressipv1.EgressIPStatusItem{} + for _, status := range egressIP.Status.Items { + if !reflect.DeepEqual(status, statusItem) { + updatedStatus = append(updatedStatus, status) + } + } + if err := eIPC.patchReplaceEgressIPStatus(egressIP.Name, updatedStatus); err != nil { + return err + } + } + resyncEgressIPs, err := eIPC.removePendingOpsAndGetResyncs(egressIPName, egressIPString) + if err != nil { + return err + } + for _, resyncEgressIP := range resyncEgressIPs { + if err := eIPC.reconcileEgressIP(nil, &resyncEgressIP); err != nil { + return fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err) + } + } + } + if shouldAdd { + // Get the EgressIP owner reference + egressIPName, exists := newCloudPrivateIPConfig.Annotations[util.OVNEgressIPOwnerRefLabel] + if !exists { + // If a CloudPrivateIPConfig object does not have an egress IP owner reference annotation upon creation + // then we should simply log this as a warning. We should get an update action later down the road where we + // then take care of the rest. Hence, do not throw an error here to avoid rescheduling. Even though not + // officially supported, think of someone creating a CloudPrivateIPConfig object manually which will never + // get the annotation. + klog.Warningf("CloudPrivateIPConfig object %q is missing the egress IP owner reference annotation. Skipping", + oldCloudPrivateIPConfig.Name) + return nil + } + egressIP, err := eIPC.kube.GetEgressIP(egressIPName) + if err != nil { + return err + } + egressIPString := cloudPrivateIPConfigNameToIPString(newCloudPrivateIPConfig.Name) + statusItem := egressipv1.EgressIPStatusItem{ + Node: newCloudPrivateIPConfig.Status.Node, + EgressIP: egressIPString, + } + // Guard against performing the same assignment twice, which might + // happen when multiple updates come in on the same object. + hasStatus := false + for _, status := range egressIP.Status.Items { + if reflect.DeepEqual(status, statusItem) { + hasStatus = true + break + } + } + if !hasStatus { + statusToKeep := append(egressIP.Status.Items, statusItem) + if err := eIPC.patchReplaceEgressIPStatus(egressIP.Name, statusToKeep); err != nil { + return err + } + } + + eIPC.pendingCloudPrivateIPConfigsMutex.Lock() + defer eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() + // Remove the finished add / update operation from the pending cache. We + // never process add and deletes in the same sync, and for updates: + // deletes are always performed before adds, hence we should only ever + // fully delete the item from the pending cache once the add has + // finished. + ops, pending := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] + if !pending { + // Do not return an error here, it will lead to spurious error + // messages on restart because we will process a bunch of adds for + // all existing objects, for which no CR was issued. + klog.V(5).Infof("No pending operation found for EgressIP: %s while processing created CloudPrivateIPConfig", egressIPName) + return nil + } + op, exists := ops[egressIPString] + if !exists { + klog.V(5).Infof("Pending operations found for EgressIP: %s, but not for the created CloudPrivateIPConfig: %s", egressIPName, egressIPString) + return nil + } + // Process finalized add / updates, hence: (op.toAdd != "" && + // op.toDelete != "") || (op.toAdd != "" && op.toDelete == ""), which is + // equivalent the below. + if op.toAdd != "" { + delete(ops, egressIPString) + } + if len(ops) == 0 { + delete(eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) + } + } + return nil +} + +// cloudPrivateIPConfigNameToIPString converts the resource name to the string +// representation of net.IP. Given a limitation in the Kubernetes API server +// (see: https://github.com/kubernetes/kubernetes/pull/100950) +// CloudPrivateIPConfig.metadata.name cannot represent an IPv6 address. To +// work-around this limitation it was decided that the network plugin creating +// the CR will fully expand the IPv6 address and replace all colons with dots, +// ex: + +// The CloudPrivateIPConfig name fc00.f853.0ccd.e793.0000.0000.0000.0054 will be +// represented as address: fc00:f853:ccd:e793::54 + +// We thus need to replace every fifth character's dot with a colon. +func cloudPrivateIPConfigNameToIPString(name string) string { + // Handle IPv4, which will work fine. + if ip := net.ParseIP(name); ip != nil { + return name + } + // Handle IPv6, for which we want to convert the fully expanded "special + // name" to go's default IP representation + name = strings.ReplaceAll(name, ".", ":") + return net.ParseIP(name).String() +} + +// removePendingOps removes the existing pending CloudPrivateIPConfig operations +// from the cache and returns the EgressIP object which can be re-synced given +// the new assignment possibilities. +func (eIPC *egressIPClusterController) removePendingOpsAndGetResyncs(egressIPName, egressIP string) ([]egressipv1.EgressIP, error) { + eIPC.pendingCloudPrivateIPConfigsMutex.Lock() + defer eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() + ops, pending := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] + if !pending { + return nil, fmt.Errorf("no pending operation found for EgressIP: %s", egressIPName) + } + op, exists := ops[egressIP] + if !exists { + return nil, fmt.Errorf("pending operations found for EgressIP: %s, but not for the finalized IP: %s", egressIPName, egressIP) + } + // Make sure we are dealing with a delete operation, since for update + // operations will still need to process the add afterwards. + if op.toAdd == "" && op.toDelete != "" { + delete(ops, egressIP) + } + if len(ops) == 0 { + delete(eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) + } + + // Some EgressIP objects might not have all of their spec.egressIPs + // assigned because there was no room to assign them. Hence, every time + // we process a final deletion for a CloudPrivateIPConfig: have a look + // at what other EgressIP objects have something un-assigned, and force + // a reconciliation on them by sending a synthetic update. + egressIPs, err := eIPC.kube.GetEgressIPs() + if err != nil { + return nil, fmt.Errorf("unable to list EgressIPs, err: %v", err) + } + resyncs := make([]egressipv1.EgressIP, 0, len(egressIPs.Items)) + for _, egressIP := range egressIPs.Items { + // Do not process the egress IP object which owns the + // CloudPrivateIPConfig for which we are currently processing the + // deletion for. + if egressIP.Name == egressIPName { + continue + } + unassigned := len(egressIP.Spec.EgressIPs) - len(egressIP.Status.Items) + ops, pending := eIPC.pendingCloudPrivateIPConfigsOps[egressIP.Name] + // If the EgressIP was never added to the pending cache to begin + // with, but has un-assigned egress IPs, try it. + if !pending && unassigned > 0 { + resyncs = append(resyncs, egressIP) + continue + } + // If the EgressIP has pending operations, have a look at if the + // unassigned operations superseed the pending ones. It could be + // that it could only execute a couple of assignments at one point. + if pending && unassigned > len(ops) { + resyncs = append(resyncs, egressIP) + } + } + return resyncs, nil +} diff --git a/go-controller/pkg/clustermanager/egressip_controller_test.go b/go-controller/pkg/clustermanager/egressip_controller_test.go new file mode 100644 index 0000000000..88f66e2bfe --- /dev/null +++ b/go-controller/pkg/clustermanager/egressip_controller_test.go @@ -0,0 +1,2349 @@ +package clustermanager + +import ( + "context" + "fmt" + "net" + "time" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/urfave/cli/v2" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8stypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + utilnet "k8s.io/utils/net" +) + +type fakeEgressIPDialer struct{} + +func (f fakeEgressIPDialer) dial(ip net.IP, timeout time.Duration) bool { + return true +} + +type fakeEgressIPHealthClient struct { + Connected bool + ProbeCount int + FakeProbeFailure bool +} + +func (fehc *fakeEgressIPHealthClient) IsConnected() bool { + return fehc.Connected +} + +func (fehc *fakeEgressIPHealthClient) Connect(dialCtx context.Context, mgmtIPs []net.IP, healthCheckPort int) bool { + if fehc.FakeProbeFailure { + return false + } + fehc.Connected = true + return true +} + +func (fehc *fakeEgressIPHealthClient) Disconnect() { + fehc.Connected = false + fehc.ProbeCount = 0 +} + +func (fehc *fakeEgressIPHealthClient) Probe(dialCtx context.Context) bool { + if fehc.Connected && !fehc.FakeProbeFailure { + fehc.ProbeCount++ + return true + } + return false +} + +type fakeEgressIPHealthClientAllocator struct{} + +func (f *fakeEgressIPHealthClientAllocator) allocate(nodeName string) healthcheck.EgressIPHealthClient { + return &fakeEgressIPHealthClient{} +} + +func newNamespaceMeta(namespace string, additionalLabels map[string]string) metav1.ObjectMeta { + labels := map[string]string{ + "name": namespace, + } + for k, v := range additionalLabels { + labels[k] = v + } + return metav1.ObjectMeta{ + UID: k8stypes.UID(namespace), + Name: namespace, + Labels: labels, + Annotations: map[string]string{}, + } +} + +func newNamespace(namespace string) *v1.Namespace { + return &v1.Namespace{ + ObjectMeta: newNamespaceMeta(namespace, nil), + Spec: v1.NamespaceSpec{}, + Status: v1.NamespaceStatus{}, + } +} + +var egressPodLabel = map[string]string{"egress": "needed"} + +func newEgressIPMeta(name string) metav1.ObjectMeta { + return metav1.ObjectMeta{ + UID: k8stypes.UID(name), + Name: name, + Labels: map[string]string{ + "name": name, + }, + } +} + +func setupNode(nodeName string, ipNets []string, mockAllocationIPs map[string]string) egressNode { + var v4IP, v6IP net.IP + var v4Subnet, v6Subnet *net.IPNet + for _, ipNet := range ipNets { + ip, net, _ := net.ParseCIDR(ipNet) + if utilnet.IsIPv6CIDR(net) { + v6Subnet = net + v6IP = ip + } else { + v4Subnet = net + v4IP = ip + } + } + + mockAllcations := map[string]string{} + for mockAllocationIP, egressIPName := range mockAllocationIPs { + mockAllcations[net.ParseIP(mockAllocationIP).String()] = egressIPName + } + + node := egressNode{ + egressIPConfig: &util.ParsedNodeEgressIPConfiguration{ + V4: util.ParsedIFAddr{ + IP: v4IP, + Net: v4Subnet, + }, + V6: util.ParsedIFAddr{ + IP: v6IP, + Net: v6Subnet, + }, + Capacity: util.Capacity{ + IP: util.UnlimitedNodeCapacity, + IPv4: util.UnlimitedNodeCapacity, + IPv6: util.UnlimitedNodeCapacity, + }, + }, + allocations: mockAllcations, + healthClient: hccAllocator.allocate(nodeName), // using fakeEgressIPHealthClientAllocator + name: nodeName, + isReady: true, + isReachable: true, + isEgressAssignable: true, + } + return node +} + +var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { + var ( + app *cli.App + fakeClusterManagerOVN *FakeClusterManager + ) + + const ( + node1Name = "node1" + node2Name = "node2" + egressIPName = "egressip" + egressIPName2 = "egressip-2" + namespace = "egressip-namespace" + v4NodeSubnet = "10.128.0.0/24" + v6NodeSubnet = "ae70::66/64" + ) + + dialer = fakeEgressIPDialer{} + hccAllocator = &fakeEgressIPHealthClientAllocator{} + + getEgressIPAllocatorSizeSafely := func() int { + fakeClusterManagerOVN.eIPC.allocator.Lock() + defer fakeClusterManagerOVN.eIPC.allocator.Unlock() + return len(fakeClusterManagerOVN.eIPC.allocator.cache) + } + + getEgressIPStatusLen := func(egressIPName string) func() int { + return func() int { + tmp, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), egressIPName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return len(tmp.Status.Items) + } + } + + getEgressIPStatus := func(egressIPName string) ([]string, []string) { + tmp, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), egressIPName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + var egressIPs, nodes []string + for _, status := range tmp.Status.Items { + egressIPs = append(egressIPs, status.EgressIP) + nodes = append(nodes, status.Node) + } + return egressIPs, nodes + } + + getEgressIPReassignmentCount := func() int { + reAssignmentCount := 0 + egressIPs, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().List(context.TODO(), metav1.ListOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for _, egressIP := range egressIPs.Items { + if len(egressIP.Spec.EgressIPs) != len(egressIP.Status.Items) { + reAssignmentCount++ + } + } + return reAssignmentCount + } + + isEgressAssignableNode := func(nodeName string) func() bool { + return func() bool { + fakeClusterManagerOVN.eIPC.allocator.Lock() + defer fakeClusterManagerOVN.eIPC.allocator.Unlock() + if item, exists := fakeClusterManagerOVN.eIPC.allocator.cache[nodeName]; exists { + return item.isEgressAssignable + } + return false + } + } + + nodeSwitch := func() string { + _, nodes := getEgressIPStatus(egressIPName) + if len(nodes) != 1 { + return "" + } + return nodes[0] + } + + ginkgo.BeforeEach(func() { + // Restore global default values before each testcase + gomega.Expect(config.PrepareTestConfig()).To(gomega.Succeed()) + config.OVNKubernetesFeature.EnableEgressIP = true + config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort = 1234 + + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + fakeClusterManagerOVN = NewFakeClusterManagerOVN() + }) + + ginkgo.AfterEach(func() { + fakeClusterManagerOVN.shutdown() + }) + + ginkgo.Context("On node ADD/UPDATE/DELETE", func() { + ginkgo.It("should re-assign EgressIPs and perform proper egressIP allocation changes", func() { + app.Action = func(ctx *cli.Context) error { + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + egressNamespace := newNamespace(namespace) + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1.Labels = map[string]string{} + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + return nil + } + + err := app.Run([]string{ + app.Name, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should re-assign EgressIPs and perform proper egressIP allocation changes during node deletion", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) + + ginkgo.Context("WatchEgressNodes", func() { + + ginkgo.It("should populated egress node data as they are tagged `egress assignable` with variants of IPv4/IPv6", func() { + app.Action = func(ctx *cli.Context) error { + + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + fakeClusterManagerOVN.start( + &v1.NodeList{ + Items: []v1.Node{}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) + + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, ip1V4Sub, err := net.ParseCIDR(node1IPv4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node2.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("using retry to create egress node with forced error followed by an update", func() { + app.Action = func(ctx *cli.Context) error { + nodeIPv4 := "192.168.126.51/24" + nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + fakeClusterManagerOVN.start( + &v1.NodeList{ + Items: []v1.Node{}, + }, + ) + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) + + _, ipV4Sub, err := net.ParseCIDR(nodeIPv4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, ipV6Sub, err := net.ParseCIDR(nodeIPv6) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node.Labels = map[string]string{} + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node.Name].egressIPConfig.V4.Net).To(gomega.Equal(ipV4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node.Name].egressIPConfig.V6.Net).To(gomega.Equal(ipV6Sub)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("ensure only one egressIP is assinged to the given node while rest of the IPs go into pending state", func() { + app.Action = func(ctx *cli.Context) error { + + config.Gateway.DisableSNATMultipleGWs = true + + egressIP1 := "192.168.126.25" + egressIP2 := "192.168.126.30" + egressIP3 := "192.168.126.35" + node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.13/24" + + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:50", "ip-address":"192.168.126.13/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + eIP2 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName2), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP3}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP1, eIP2}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Ensure first egressIP object is assigned, since only node1 is an egressNode, only 1IP will be assigned, other will be pending + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + recordedEvent := <-fakeClusterManagerOVN.fakeRecorder.Events + gomega.Expect(recordedEvent).To(gomega.ContainSubstring("Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", eIP1.Name)) + egressIPs1, nodes1 := getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[0]).To(gomega.Equal(node1.Name)) + possibleAssignments := sets.NewString(egressIP1, egressIP2) + gomega.Expect(possibleAssignments.Has(egressIPs1[0])).To(gomega.BeTrue()) + + // Ensure second egressIP object is also assigned to node1, but no OVN config will be done for this + gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) + egressIPs2, nodes2 := getEgressIPStatus(egressIPName2) + gomega.Expect(nodes2[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) + + // Make second node egressIP assignable + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ensure secondIP from first object gets assigned to node2 + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs1, nodes1 = getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[1]).To(gomega.Equal(node2.Name)) + gomega.Expect(possibleAssignments.Has(egressIPs1[1])).To(gomega.BeTrue()) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should skip populating egress node data for nodes that have incorrect IP address", func() { + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true // no impact on global eIPC functions + nodeIPv4 := "192.168.126.510/24" + nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + fakeClusterManagerOVN.start( + &v1.NodeList{ + Items: []v1.Node{node}, + }, + ) + + allocatorItems := func() int { + return len(fakeClusterManagerOVN.eIPC.allocator.cache) + } + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(allocatorItems).Should(gomega.Equal(0)) + + node.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(allocatorItems).Should(gomega.Equal(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should probe nodes using grpc", func() { + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = false // no impact on global eIPC functions + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", "", node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + fakeClusterManagerOVN.start() + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) + + _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + + cachedEgressNode1 := fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name] + cachedEgressNode2 := fakeClusterManagerOVN.eIPC.allocator.cache[node2.Name] + gomega.Expect(cachedEgressNode1.egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + gomega.Expect(cachedEgressNode2.egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) + + // Explicitly call check reachibility so we need not to wait for slow periodic timer + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + gomega.Expect(cachedEgressNode1.isReachable).To(gomega.BeTrue()) + gomega.Expect(cachedEgressNode2.isReachable).To(gomega.BeTrue()) + + // The test cases below will manipulate the fakeEgressIPHealthClient used for mocking + // a gRPC session dedicated to monitoring each of the 2 nodes created. It does that + // by setting the probe fail boolean which in turn causes the mocked probe call to + // pretend that the periodic monitor succeeded or not. + tests := []struct { + desc string + node1FailProbes bool + node2FailProbes bool + // This function is an optional and generic function for the test case + // to allow any special pre-conditioning needed before invoking of + // checkEgressNodesReachabilityIterate in the test. + tcPrepareFunc func(hcc1, hcc2 *fakeEgressIPHealthClient) + }{ + { + desc: "disconnect nodes", + node1FailProbes: true, + node2FailProbes: true, + tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { + hcc1.Disconnect() + hcc2.Disconnect() + }, + }, + { + desc: "connect node1", + node2FailProbes: true, + }, + { + desc: "node1 connected, connect node2", + }, + { + desc: "node1 and node2 connected, bump only node2 counters", + node1FailProbes: true, + }, + { + desc: "node2 connected, disconnect node1", + node1FailProbes: true, + node2FailProbes: true, + tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { + hcc1.Disconnect() + }, + }, + { + desc: "connect node1, disconnect node2", + node2FailProbes: true, + tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { + hcc2.Disconnect() + }, + }, + { + desc: "node1 and node2 connected and both counters bump", + tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { + // Perform an additional iteration, to make probe counters to bump on second call + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + }, + }, + } + + // hcc1 and hcc2 are the mocked gRPC client to node1 and node2, respectively. + // They are what we use to manipulate whether probes to the node should fail or + // not, as well as a mechanism for explicitly disconnecting as part of the test. + hcc1 := cachedEgressNode1.healthClient.(*fakeEgressIPHealthClient) + hcc2 := cachedEgressNode2.healthClient.(*fakeEgressIPHealthClient) + + // ttIterCheck is the common function used by each test case. It will check whether + // a client changed its connection state and if the number of probes to the node + // changed as expected. + ttIterCheck := func(hcc *fakeEgressIPHealthClient, prevNodeIsConnected bool, prevProbes int, failProbes bool, desc string) { + currNodeIsConnected := hcc.IsConnected() + gomega.Expect(currNodeIsConnected || failProbes).To(gomega.BeTrue(), desc) + + if !prevNodeIsConnected && !currNodeIsConnected { + // Not connected (before and after): no probes should be successful + gomega.Expect(hcc.ProbeCount).To(gomega.Equal(prevProbes), desc) + } else if prevNodeIsConnected && currNodeIsConnected { + if failProbes { + // Still connected, but no probes should be successful + gomega.Expect(prevProbes).To(gomega.Equal(hcc.ProbeCount), desc) + } else { + // Still connected and probe counters should be going up + gomega.Expect(prevProbes < hcc.ProbeCount).To(gomega.BeTrue(), desc) + } + } + } + + for _, tt := range tests { + hcc1.FakeProbeFailure = tt.node1FailProbes + hcc2.FakeProbeFailure = tt.node2FailProbes + + prevNode1IsConnected := hcc1.IsConnected() + prevNode2IsConnected := hcc2.IsConnected() + prevNode1Probes := hcc1.ProbeCount + prevNode2Probes := hcc2.ProbeCount + + if tt.tcPrepareFunc != nil { + tt.tcPrepareFunc(hcc1, hcc2) + } + + // Perform connect or probing, depending on the state of the connections + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + + ttIterCheck(hcc1, prevNode1IsConnected, prevNode1Probes, tt.node1FailProbes, tt.desc) + ttIterCheck(hcc2, prevNode2IsConnected, prevNode2Probes, tt.node2FailProbes, tt.desc) + } + + gomega.Expect(hcc1.IsConnected()).To(gomega.BeTrue()) + gomega.Expect(hcc2.IsConnected()).To(gomega.BeTrue()) + + // Lastly, remove egress assignable from node 2 and make sure it disconnects + node2.Labels = map[string]string{} + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + + // Explicitly call check reachibility so we need not to wait for slow periodic timer + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + + gomega.Expect(hcc1.IsConnected()).To(gomega.BeTrue()) + gomega.Expect(hcc2.IsConnected()).To(gomega.BeFalse()) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("WatchEgressNodes running with WatchEgressIP", func() { + ginkgo.It("should result in error and event if specified egress IP is a cluster node IP", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.51" + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + gomega.Eventually(fakeClusterManagerOVN.fakeRecorder.Events).Should(gomega.HaveLen(3)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should remove stale EgressIP setup when node label is removed while ovnkube-master is not running and assign to newly labelled node", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.25" + node1IPv4 := "192.168.126.51/24" + + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1.Name, + EgressIP: egressIP1, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should only get assigned EgressIPs which matches their subnet when the node is tagged", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, ip1V4Sub, err := net.ParseCIDR(node1IPv4) + _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) + _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeFalse()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node2.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) + gomega.Eventually(eIP.Status.Items).Should(gomega.HaveLen(0)) + + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should re-balance EgressIPs when their node is removed", func() { + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true // no impact on global eIPC functions + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.12/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + + err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).ToNot(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + + getNewNode := func() string { + _, nodes = getEgressIPStatus(egressIPName) + if len(nodes) > 0 { + return nodes[0] + } + return "" + } + + gomega.Eventually(getNewNode).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("egress node update should not mark the node as reachable if there was no label/readiness change", func() { + // When an egress node becomes reachable during a node update event and there is no changes to node labels/readiness + // unassigned egress IP should be eventually added by the periodic reachability check. + // Test steps: + // - disable periodic check from running in background, so it can be called directly from the test + // - assign egress IP to an available node + // - make the node unreachable and verify that the egress IP was unassigned + // - make the node reachable and update a node + // - verify that the egress IP was assigned by calling the periodic reachability check + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true // no impact on global eIPC functions + egressIP := "192.168.126.101" + nodeIPv4 := "192.168.126.51/24" + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", nodeIPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\"]}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP1}, + }, + &v1.NodeList{ + Items: []v1.Node{node}, + }, + ) + + // Virtually disable background reachability check by using a huge interval + fakeClusterManagerOVN.eIPC.reachabilityCheckInterval = time.Hour + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) + egressIPs, _ := getEgressIPStatus(eIP1.Name) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + hcClient := fakeClusterManagerOVN.eIPC.allocator.cache[node.Name].healthClient.(*fakeEgressIPHealthClient) + hcClient.FakeProbeFailure = true + // explicitly call check reachability, periodic checker is not active + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(0)) + + hcClient.FakeProbeFailure = false + node.Annotations["test"] = "dummy" + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(hcClient.IsConnected()).Should(gomega.Equal(true)) + // the node should not be marked as reachable in the update handler as it is not getting added + gomega.Consistently(func() bool { return fakeClusterManagerOVN.eIPC.allocator.cache[node.Name].isReachable }).Should(gomega.Equal(false)) + + // egress IP should get assigned on the next checkEgressNodesReachabilityIterate call + // explicitly call check reachability, periodic checker is not active + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("IPv6 assignment", func() { + + ginkgo.It("should be able to allocate non-conflicting IP on node with lowest amount of allocations", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "0:0:0:0:0:feff:c0a8:8e0f" + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should be able to allocate several EgressIPs and avoid the same node", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP1 := "0:0:0:0:0:feff:c0a8:8e0d" + egressIP2 := "0:0:0:0:0:feff:c0a8:8e0f" + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(2)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP1).String())) + gomega.Expect(assignedStatuses[1].Node).To(gomega.Equal(node1.name)) + gomega.Expect(assignedStatuses[1].EgressIP).To(gomega.Equal(net.ParseIP(egressIP2).String())) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should be able to allocate several EgressIPs and avoid the same node and leave one un-assigned without error", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP1 := "0:0:0:0:0:feff:c0a8:8e0d" + egressIP2 := "0:0:0:0:0:feff:c0a8:8e0e" + egressIP3 := "0:0:0:0:0:feff:c0a8:8e0f" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2, egressIP3}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(2)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP1).String())) + gomega.Expect(assignedStatuses[1].Node).To(gomega.Equal(node1.name)) + gomega.Expect(assignedStatuses[1].EgressIP).To(gomega.Equal(net.ParseIP(egressIP2).String())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should return the already allocated IP with the same node if it is allocated again", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "0:0:0:0:0:feff:c0a8:8e32" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{egressIP: egressIPName, "0:0:0:0:0:feff:c0a8:8e1e": "bogus1"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus2"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + egressIPs := []string{egressIP} + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node1Name)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate node IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "0:0:0:0:0:feff:c0a8:8e0c" + + node1 := setupNode(node1Name, []string{egressIP + "/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate conflicting compressed IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "::feff:c0a8:8e32" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + egressIPs := []string{egressIP} + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate IPv4 IP on nodes which can only host IPv6", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "192.168.126.16" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIPs := []string{egressIP} + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: eIPs, + }, + } + + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should be able to allocate non-conflicting compressed uppercase IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "::FEFF:C0A8:8D32" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate conflicting compressed uppercase IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "::FEFF:C0A8:8E32" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + egressIPs := []string{egressIP} + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate invalid IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIPs := []string{"0:0:0:0:0:feff:c0a8:8e32:5"} + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + assignedStatuses, err := fakeClusterManagerOVN.eIPC.validateEgressIPSpec(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.Equal(fmt.Sprintf("unable to parse provided EgressIP: %s, invalid", egressIPs[0]))) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("Dual-stack assignment", func() { + + ginkgo.It("should be able to allocate non-conflicting IPv4 on node which can host it, even if it happens to be the node with more assignments", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + egressIP := "192.168.126.99" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus1"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus1", "192.168.126.102": "bogus2"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) + + ginkgo.Context("IPv4 assignment", func() { + + ginkgo.It("Should not be able to assign egress IP defined in CIDR notation", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIPs := []string{"192.168.126.99/32"} + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + validatedIPs, err := fakeClusterManagerOVN.eIPC.validateEgressIPSpec(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.Equal(fmt.Sprintf("unable to parse provided EgressIP: %s, invalid", egressIPs[0]))) + gomega.Expect(validatedIPs).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) + + ginkgo.Context("WatchEgressIP", func() { + + ginkgo.It("should update status correctly for single-stack IPv4", func() { + app.Action = func(ctx *cli.Context) error { + fakeClusterManagerOVN.start() + + egressIP := "192.168.126.10" + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "does-not-exist", + }, + }, + }, + } + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should update status correctly for single-stack IPv6", func() { + app.Action = func(ctx *cli.Context) error { + fakeClusterManagerOVN.start() + + egressIP := "0:0:0:0:0:feff:c0a8:8e0d" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(net.ParseIP(egressIP).String())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should update status correctly for dual-stack", func() { + app.Action = func(ctx *cli.Context) error { + fakeClusterManagerOVN.start() + + egressIPv4 := "192.168.126.101" + egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus1"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus2", "192.168.126.102": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIPv4, egressIPv6}, + }, + } + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes).To(gomega.ConsistOf(node2.name, node1.name)) + gomega.Expect(egressIPs).To(gomega.ConsistOf(net.ParseIP(egressIPv6).String(), net.ParseIP(egressIPv4).String())) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("syncEgressIP for dual-stack", func() { + + ginkgo.It("should not update valid assignments", func() { + app.Action = func(ctx *cli.Context) error { + + egressIPv4 := "192.168.126.101" + egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.102": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIPv4, egressIPv6}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIPv4, + Node: node2.name, + }, + { + EgressIP: net.ParseIP(egressIPv6).String(), + Node: node1.name, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes).To(gomega.ConsistOf(eIP.Status.Items[0].Node, eIP.Status.Items[1].Node)) + gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("syncEgressIP for IPv4", func() { + + ginkgo.It("should update invalid assignments on duplicated node", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + egressIP2 := "192.168.126.100" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{egressIP1: egressIPName, egressIP2: egressIPName}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIP1, + Node: node1.name, + }, + { + EgressIP: egressIP2, + Node: node1.name, + }, + }, + }, + } + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes).To(gomega.ConsistOf(node1.name, node2.name)) + gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should update invalid assignments with incorrectly parsed IP", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + egressIPIncorrect := "192.168.126.1000" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIPIncorrect, + Node: node1.name, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should update invalid assignments with unhostable IP on a node", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + egressIPIncorrect := "192.168.128.100" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIPIncorrect, + Node: node1.name, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not update valid assignment", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIP1, + Node: node1.name, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("AddEgressIP for IPv4", func() { + + ginkgo.It("should not create two EgressIPs with same egress IP value", func() { + app.Action = func(ctx *cli.Context) error { + egressIP1 := "192.168.126.101" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta("egressip"), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + } + eIP2 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta("egressip2"), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + } + + fakeClusterManagerOVN.start() + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP1, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(eIP1.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(eIP2.Name)).Should(gomega.Equal(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) + + ginkgo.Context("UpdateEgressIP for IPv4", func() { + + ginkgo.It("should perform re-assingment of EgressIPs", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + updateEgressIP := "192.168.126.10" + + node1 := setupNode(node1Name, []string{"192.168.126.41/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + fakeClusterManagerOVN.start() + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP1, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + eIPToUpdate, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + eIPToUpdate.Spec.EgressIPs = []string{updateEgressIP} + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPToUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + getEgressIP := func() string { + egressIPs, _ = getEgressIPStatus(egressIPName) + if len(egressIPs) == 0 { + return "try again" + } + return egressIPs[0] + } + + gomega.Eventually(getEgressIP).Should(gomega.Equal(updateEgressIP)) + _, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) +}) diff --git a/go-controller/pkg/clustermanager/egressip_event_handler.go b/go-controller/pkg/clustermanager/egressip_event_handler.go new file mode 100644 index 0000000000..e1ff8142fb --- /dev/null +++ b/go-controller/pkg/clustermanager/egressip_event_handler.go @@ -0,0 +1,263 @@ +package clustermanager + +import ( + "fmt" + "reflect" + + ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" + egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + objretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + v1 "k8s.io/api/core/v1" + cache "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +// egressIPClusterControllerEventHandler object handles the events +// from retry framework for the egressIPClusterController. +type egressIPClusterControllerEventHandler struct { + objretry.EventHandler + objType reflect.Type + eIPC *egressIPClusterController + syncFunc func([]interface{}) error +} + +// egressIPClusterControllerEventHandler functions + +// AddResource adds the specified object to the cluster according to its type and +// returns the error, if any, yielded during object creation. +func (h *egressIPClusterControllerEventHandler) AddResource(obj interface{}, fromRetryLoop bool) error { + switch h.objType { + case factory.EgressNodeType: + node := obj.(*v1.Node) + // Initialize the allocator on every update, + // ovnkube-node/cloud-network-config-controller will make sure to + // annotate the node with the egressIPConfig, but that might have + // happened after we processed the ADD for that object, hence keep + // retrying for all UPDATEs. + if err := h.eIPC.initEgressIPAllocator(node); err != nil { + klog.Warningf("Egress node initialization error: %v", err) + } + nodeEgressLabel := util.GetNodeEgressLabel() + nodeLabels := node.GetLabels() + _, hasEgressLabel := nodeLabels[nodeEgressLabel] + if hasEgressLabel { + h.eIPC.setNodeEgressAssignable(node.Name, true) + } + isReady := h.eIPC.isEgressNodeReady(node) + if isReady { + h.eIPC.setNodeEgressReady(node.Name, true) + } + isReachable := h.eIPC.isEgressNodeReachable(node) + if hasEgressLabel && isReachable && isReady { + h.eIPC.setNodeEgressReachable(node.Name, true) + if err := h.eIPC.addEgressNode(node.Name); err != nil { + return err + } + } + case factory.EgressIPType: + eIP := obj.(*egressipv1.EgressIP) + return h.eIPC.reconcileEgressIP(nil, eIP) + case factory.CloudPrivateIPConfigType: + cloudPrivateIPConfig := obj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) + return h.eIPC.reconcileCloudPrivateIPConfig(nil, cloudPrivateIPConfig) + default: + return fmt.Errorf("no add function for object type %s", h.objType) + } + return nil +} + +// UpdateResource updates the specified object in the cluster to its version in newObj according +// to its type and returns the error, if any, yielded during the object update. +// The inRetryCache boolean argument is to indicate if the given resource is in the retryCache or not. +func (h *egressIPClusterControllerEventHandler) UpdateResource(oldObj, newObj interface{}, inRetryCache bool) error { + switch h.objType { + case factory.EgressIPType: + oldEIP := oldObj.(*egressipv1.EgressIP) + newEIP := newObj.(*egressipv1.EgressIP) + return h.eIPC.reconcileEgressIP(oldEIP, newEIP) + case factory.EgressNodeType: + oldNode := oldObj.(*v1.Node) + newNode := newObj.(*v1.Node) + // Initialize the allocator on every update, + // ovnkube-node/cloud-network-config-controller will make sure to + // annotate the node with the egressIPConfig, but that might have + // happened after we processed the ADD for that object, hence keep + // retrying for all UPDATEs. + if err := h.eIPC.initEgressIPAllocator(newNode); err != nil { + klog.Warningf("Egress node initialization error: %v", err) + } + nodeEgressLabel := util.GetNodeEgressLabel() + oldLabels := oldNode.GetLabels() + newLabels := newNode.GetLabels() + _, oldHadEgressLabel := oldLabels[nodeEgressLabel] + _, newHasEgressLabel := newLabels[nodeEgressLabel] + // If the node is not labeled for egress assignment, just return + // directly, we don't really need to set the ready / reachable + // status on this node if the user doesn't care about using it. + if !oldHadEgressLabel && !newHasEgressLabel { + return nil + } + h.eIPC.setNodeEgressAssignable(newNode.Name, newHasEgressLabel) + if oldHadEgressLabel && !newHasEgressLabel { + klog.Infof("Node: %s has been un-labeled, deleting it from egress assignment", newNode.Name) + return h.eIPC.deleteEgressNode(oldNode.Name) + } + isOldReady := h.eIPC.isEgressNodeReady(oldNode) + isNewReady := h.eIPC.isEgressNodeReady(newNode) + isNewReachable := h.eIPC.isEgressNodeReachable(newNode) + h.eIPC.setNodeEgressReady(newNode.Name, isNewReady) + if !oldHadEgressLabel && newHasEgressLabel { + klog.Infof("Node: %s has been labeled, adding it for egress assignment", newNode.Name) + if isNewReady && isNewReachable { + h.eIPC.setNodeEgressReachable(newNode.Name, isNewReachable) + if err := h.eIPC.addEgressNode(newNode.Name); err != nil { + return err + } + } else { + klog.Warningf("Node: %s has been labeled, but node is not ready"+ + " and reachable, cannot use it for egress assignment", newNode.Name) + } + return nil + } + if isOldReady == isNewReady { + return nil + } + if !isNewReady { + klog.Warningf("Node: %s is not ready, deleting it from egress assignment", newNode.Name) + if err := h.eIPC.deleteEgressNode(newNode.Name); err != nil { + return err + } + } else if isNewReady && isNewReachable { + klog.Infof("Node: %s is ready and reachable, adding it for egress assignment", newNode.Name) + h.eIPC.setNodeEgressReachable(newNode.Name, isNewReachable) + if err := h.eIPC.addEgressNode(newNode.Name); err != nil { + return err + } + } + return nil + case factory.CloudPrivateIPConfigType: + oldCloudPrivateIPConfig := oldObj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) + newCloudPrivateIPConfig := newObj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) + return h.eIPC.reconcileCloudPrivateIPConfig(oldCloudPrivateIPConfig, newCloudPrivateIPConfig) + default: + return fmt.Errorf("no update function for object type %s", h.objType) + } +} + +// DeleteResource deletes the object from the cluster according to the delete logic of its resource type. +// cachedObj is the internal cache entry for this object, used for now for pods and network policies. +func (h *egressIPClusterControllerEventHandler) DeleteResource(obj, cachedObj interface{}) error { + switch h.objType { + case factory.EgressIPType: + eIP := obj.(*egressipv1.EgressIP) + return h.eIPC.reconcileEgressIP(eIP, nil) + case factory.EgressNodeType: + node := obj.(*v1.Node) + h.eIPC.deleteNodeForEgress(node) + nodeEgressLabel := util.GetNodeEgressLabel() + nodeLabels := node.GetLabels() + _, hasEgressLabel := nodeLabels[nodeEgressLabel] + if hasEgressLabel { + if err := h.eIPC.deleteEgressNode(node.Name); err != nil { + return err + } + } + return nil + case factory.CloudPrivateIPConfigType: + cloudPrivateIPConfig := obj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) + return h.eIPC.reconcileCloudPrivateIPConfig(cloudPrivateIPConfig, nil) + default: + return fmt.Errorf("no delete function for object type %s", h.objType) + } +} + +func (h *egressIPClusterControllerEventHandler) SyncFunc(objs []interface{}) error { + var syncFunc func([]interface{}) error + + if h.syncFunc != nil { + // syncFunc was provided explicitly + syncFunc = h.syncFunc + } else { + switch h.objType { + case factory.EgressNodeType: + syncFunc = h.eIPC.initEgressNodeReachability + case factory.EgressIPType, + factory.CloudPrivateIPConfigType: + syncFunc = nil + + default: + return fmt.Errorf("no sync function for object type %s", h.objType) + } + } + if syncFunc == nil { + return nil + } + return syncFunc(objs) +} + +// RecordAddEvent records the add event on this object. Not used here. +func (h *egressIPClusterControllerEventHandler) RecordAddEvent(obj interface{}) { +} + +// RecordUpdateEvent records the update event on this object. Not used here. +func (h *egressIPClusterControllerEventHandler) RecordUpdateEvent(obj interface{}) { +} + +// RecordDeleteEvent records the delete event on this object. Not used here. +func (h *egressIPClusterControllerEventHandler) RecordDeleteEvent(obj interface{}) { +} + +func (h *egressIPClusterControllerEventHandler) RecordSuccessEvent(obj interface{}) { +} + +// RecordErrorEvent records an error event on this object. Not used here. +func (h *egressIPClusterControllerEventHandler) RecordErrorEvent(obj interface{}, reason string, err error) { +} + +// isResourceScheduled returns true if the object has been scheduled. Always returns true. +func (h *egressIPClusterControllerEventHandler) IsResourceScheduled(obj interface{}) bool { + return true +} + +// IsObjectInTerminalState returns true if the object is a in terminal state. Always returns true. +func (h *egressIPClusterControllerEventHandler) IsObjectInTerminalState(obj interface{}) bool { + return false +} + +func (h *egressIPClusterControllerEventHandler) AreResourcesEqual(obj1, obj2 interface{}) (bool, error) { + return false, nil +} + +// GetInternalCacheEntry returns the internal cache entry for this object +func (h *egressIPClusterControllerEventHandler) GetInternalCacheEntry(obj interface{}) interface{} { + return nil +} + +// getResourceFromInformerCache returns the latest state of the object from the informers cache +// given an object key and its type +func (h *egressIPClusterControllerEventHandler) GetResourceFromInformerCache(key string) (interface{}, error) { + var obj interface{} + var name string + var err error + + _, name, err = cache.SplitMetaNamespaceKey(key) + if err != nil { + return nil, fmt.Errorf("failed to split key %s: %v", key, err) + } + + switch h.objType { + case factory.EgressNodeType: + obj, err = h.eIPC.watchFactory.GetNode(name) + case factory.CloudPrivateIPConfigType: + obj, err = h.eIPC.watchFactory.GetCloudPrivateIPConfig(name) + case factory.EgressIPType: + obj, err = h.eIPC.watchFactory.GetEgressIP(name) + + default: + err = fmt.Errorf("object type %s not supported, cannot retrieve it from informers cache", + h.objType) + } + return obj, err +} diff --git a/go-controller/pkg/clustermanager/fake_cluster_manager_test.go b/go-controller/pkg/clustermanager/fake_cluster_manager_test.go new file mode 100644 index 0000000000..805f9d181e --- /dev/null +++ b/go-controller/pkg/clustermanager/fake_cluster_manager_test.go @@ -0,0 +1,65 @@ +package clustermanager + +import ( + "sync" + + "github.com/onsi/gomega" + egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/tools/record" +) + +type FakeClusterManager struct { + fakeClient *util.OVNClusterManagerClientset + watcher *factory.WatchFactory + eIPC *egressIPClusterController + stopChan chan struct{} + wg *sync.WaitGroup + fakeRecorder *record.FakeRecorder +} + +func NewFakeClusterManagerOVN() *FakeClusterManager { + return &FakeClusterManager{ + fakeRecorder: record.NewFakeRecorder(10), + } +} + +func (o *FakeClusterManager) start(objects ...runtime.Object) { + egressIPObjects := []runtime.Object{} + v1Objects := []runtime.Object{} + for _, object := range objects { + if _, isEgressIPObject := object.(*egressip.EgressIPList); isEgressIPObject { + egressIPObjects = append(egressIPObjects, object) + } else { + v1Objects = append(v1Objects, object) + } + } + o.fakeClient = &util.OVNClusterManagerClientset{ + KubeClient: fake.NewSimpleClientset(v1Objects...), + EgressIPClient: egressipfake.NewSimpleClientset(egressIPObjects...), + } + o.init() +} + +func (o *FakeClusterManager) init() { + var err error + o.watcher, err = factory.NewClusterManagerWatchFactory(o.fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = o.watcher.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + o.stopChan = make(chan struct{}) + o.wg = &sync.WaitGroup{} + o.eIPC = newEgressIPController(o.fakeClient, o.watcher, o.fakeRecorder) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} + +func (o *FakeClusterManager) shutdown() { + o.watcher.Shutdown() + close(o.stopChan) + o.wg.Wait() +} diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 36264388c6..04472d3c2f 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -166,7 +166,6 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), efFactory: egressfirewallinformerfactory.NewSharedInformerFactory(ovnClientset.EgressFirewallClient, resyncInterval), - cpipcFactory: ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval), egressQoSFactory: egressqosinformerfactory.NewSharedInformerFactory(ovnClientset.EgressQoSClient, resyncInterval), mnpFactory: mnpinformerfactory.NewSharedInformerFactory(ovnClientset.MultiNetworkPolicyClient, resyncInterval), egressServiceFactory: egressserviceinformerfactory.NewSharedInformerFactory(ovnClientset.EgressServiceClient, resyncInterval), @@ -256,12 +255,6 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory return nil, err } } - if util.PlatformTypeIsEgressIPCloudProvider() { - wf.informers[CloudPrivateIPConfigType], err = newInformer(CloudPrivateIPConfigType, wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer()) - if err != nil { - return nil, err - } - } if config.OVNKubernetesFeature.EnableEgressQoS { wf.informers[EgressQoSType], err = newInformer(EgressQoSType, wf.egressQoSFactory.K8s().V1().EgressQoSes().Informer()) if err != nil { diff --git a/go-controller/pkg/factory/factory_test.go b/go-controller/pkg/factory/factory_test.go index 43c91bb58a..5810119e46 100644 --- a/go-controller/pkg/factory/factory_test.go +++ b/go-controller/pkg/factory/factory_test.go @@ -253,6 +253,7 @@ func (c *handlerCalls) getDeleted() int { var _ = Describe("Watch Factory Operations", func() { var ( ovnClientset *util.OVNMasterClientset + ovnCMClientset *util.OVNClusterManagerClientset fakeClient *fake.Clientset egressIPFakeClient *egressipfake.Clientset egressFirewallFakeClient *egressfirewallfake.Clientset @@ -307,10 +308,14 @@ var _ = Describe("Watch Factory Operations", func() { KubeClient: fakeClient, EgressIPClient: egressIPFakeClient, EgressFirewallClient: egressFirewallFakeClient, - CloudNetworkClient: cloudNetworkFakeClient, EgressQoSClient: egressQoSFakeClient, EgressServiceClient: egressServiceFakeClient, } + ovnCMClientset = &util.OVNClusterManagerClientset{ + KubeClient: fakeClient, + EgressIPClient: egressIPFakeClient, + CloudNetworkClient: cloudNetworkFakeClient, + } pods = make([]*v1.Pod, 0) podWatch = objSetup(fakeClient, "pods", func(core.Action) (bool, runtime.Object, error) { @@ -420,6 +425,8 @@ var _ = Describe("Watch Factory Operations", func() { testExisting := func(objType reflect.Type, namespace string, sel labels.Selector, priority int) { if objType == EndpointSliceType { wf, err = NewNodeWatchFactory(ovnClientset.GetNodeClientset(), nodeName) + } else if objType == CloudPrivateIPConfigType { + wf, err = NewClusterManagerWatchFactory(ovnCMClientset) } else { wf, err = NewMasterWatchFactory(ovnClientset) } @@ -442,6 +449,8 @@ var _ = Describe("Watch Factory Operations", func() { testExistingFilteredHandler := func(objType reflect.Type, realObj reflect.Type, namespace string, sel labels.Selector, priority int) { if objType == EndpointSliceType { wf, err = NewNodeWatchFactory(ovnClientset.GetNodeClientset(), nodeName) + } else if objType == CloudPrivateIPConfigType { + wf, err = NewClusterManagerWatchFactory(ovnCMClientset) } else { wf, err = NewMasterWatchFactory(ovnClientset) } @@ -572,6 +581,8 @@ var _ = Describe("Watch Factory Operations", func() { testExisting := func(objType reflect.Type) { if objType == EndpointSliceType { wf, err = NewNodeWatchFactory(ovnClientset.GetNodeClientset(), nodeName) + } else if objType == CloudPrivateIPConfigType { + wf, err = NewClusterManagerWatchFactory(ovnCMClientset) } else { wf, err = NewMasterWatchFactory(ovnClientset) } @@ -1648,7 +1659,7 @@ var _ = Describe("Watch Factory Operations", func() { wf.RemoveEgressIPHandler(h) }) It("responds to cloudPrivateIPConfig add/update/delete events", func() { - wf, err = NewMasterWatchFactory(ovnClientset) + wf, err = NewClusterManagerWatchFactory(ovnCMClientset) Expect(err).NotTo(HaveOccurred()) err = wf.Start() Expect(err).NotTo(HaveOccurred()) diff --git a/go-controller/pkg/ovn/base_event_handler.go b/go-controller/pkg/ovn/base_event_handler.go index 2d25b31157..874d773d08 100644 --- a/go-controller/pkg/ovn/base_event_handler.go +++ b/go-controller/pkg/ovn/base_event_handler.go @@ -29,7 +29,6 @@ func hasResourceAnUpdateFunc(objType reflect.Type) bool { factory.EgressIPPodType, factory.EgressNodeType, factory.EgressFwNodeType, - factory.CloudPrivateIPConfigType, factory.LocalPodSelectorType, factory.NamespaceType, factory.MultiNetworkPolicyType: @@ -92,8 +91,7 @@ func (h *baseNetworkControllerEventHandler) areResourcesEqual(objType reflect.Ty case factory.EgressIPType, factory.EgressIPNamespaceType, - factory.EgressNodeType, - factory.CloudPrivateIPConfigType: + factory.EgressNodeType: // force update path for EgressIP resource. return false, nil @@ -167,9 +165,6 @@ func (h *baseNetworkControllerEventHandler) getResourceFromInformerCache(objType case factory.EgressIPType: obj, err = watchFactory.GetEgressIP(name) - case factory.CloudPrivateIPConfigType: - obj, err = watchFactory.GetCloudPrivateIPConfig(name) - case factory.MultiNetworkPolicyType: obj, err = watchFactory.GetMultiNetworkPolicy(namespace, name) @@ -199,7 +194,6 @@ func needsUpdateDuringRetry(objType reflect.Type) bool { factory.EgressIPType, factory.EgressIPPodType, factory.EgressIPNamespaceType, - factory.CloudPrivateIPConfigType, factory.MultiNetworkPolicyType: return true } diff --git a/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go b/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go index 439a770b59..eb161051ee 100644 --- a/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go +++ b/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go @@ -5,8 +5,10 @@ import ( "encoding/json" "fmt" "net" + "os" "sort" "sync" + "syscall" "time" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" @@ -80,6 +82,89 @@ func (c *Controller) CheckNodesReachabilityIterate() { } } +type egressSVCDialer interface { + dial(ip net.IP, timeout time.Duration) bool +} + +var dialer egressSVCDialer = &egressSVCDial{} + +type egressSVCDial struct{} + +// Blantant copy from: https://github.com/openshift/sdn/blob/master/pkg/network/common/egressip.go#L499-L505 +// Ping a node and return whether or not we think it is online. We do this by trying to +// open a TCP connection to the "discard" service (port 9); if the node is offline, the +// attempt will either time out with no response, or else return "no route to host" (and +// we will return false). If the node is online then we presumably will get a "connection +// refused" error; but the code below assumes that anything other than timeout or "no +// route" indicates that the node is online. +func (e *egressSVCDial) dial(ip net.IP, timeout time.Duration) bool { + conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip.String(), "9"), timeout) + if conn != nil { + conn.Close() + } + if opErr, ok := err.(*net.OpError); ok { + if opErr.Timeout() { + return false + } + if sysErr, ok := opErr.Err.(*os.SyscallError); ok && sysErr.Err == syscall.EHOSTUNREACH { + return false + } + } + return true +} + +func IsReachableViaGRPC(mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient, healthCheckPort, totalTimeout int) bool { + dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Duration(totalTimeout)*time.Second) + defer dialCancel() + + if !healthClient.IsConnected() { + // gRPC session is not up. Attempt to connect and if that suceeds, we will declare node as reacheable. + return healthClient.Connect(dialCtx, mgmtIPs, healthCheckPort) + } + + // gRPC session is already established. Send a probe, which will succeed, or close the session. + return healthClient.Probe(dialCtx) +} + +func IsReachableLegacy(node string, mgmtIPs []net.IP, totalTimeout int) bool { + var retryTimeOut, initialRetryTimeOut time.Duration + + numMgmtIPs := len(mgmtIPs) + if numMgmtIPs == 0 { + return false + } + + switch totalTimeout { + // Check if we need to do node reachability check + case 0: + return true + case 1: + // Using time duration for initial retry with 700/numIPs msec and retry of 100/numIPs msec + // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. + initialRetryTimeOut = time.Duration(700/numMgmtIPs) * time.Millisecond + retryTimeOut = time.Duration(100/numMgmtIPs) * time.Millisecond + default: + // Using time duration for initial retry with 900/numIPs msec + // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. + initialRetryTimeOut = time.Duration(900/numMgmtIPs) * time.Millisecond + retryTimeOut = initialRetryTimeOut + } + + timeout := initialRetryTimeOut + endTime := time.Now().Add(time.Second * time.Duration(totalTimeout)) + for time.Now().Before(endTime) { + for _, ip := range mgmtIPs { + if dialer.dial(ip, timeout) { + return true + } + } + time.Sleep(100 * time.Millisecond) + timeout = retryTimeOut + } + klog.Errorf("Failed reachability check for %s", node) + return false +} + func (c *Controller) onNodeAdd(obj interface{}) { key, err := cache.MetaNamespaceKeyFunc(obj) if err != nil { diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index a77dc7f008..6018f5c75f 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -8,7 +8,6 @@ import ( "sync" "time" - ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" @@ -88,7 +87,7 @@ type DefaultNetworkController struct { defaultCOPPUUID string // Controller used for programming OVN for egress IP - eIPC egressIPController + eIPC egressIPZoneController // Controller used to handle services svcController *svccontroller.Controller @@ -121,9 +120,6 @@ type DefaultNetworkController struct { hybridOverlayFailed sync.Map syncZoneICFailed sync.Map - // retry framework for Cloud private IP config - retryCloudPrivateIPConfig *retry.RetryFramework - // variable to determine if all pods present on the node during startup have been processed // updated atomically allInitialPodsProcessed uint32 @@ -187,19 +183,13 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, }, externalGWCache: make(map[ktypes.NamespacedName]*externalRouteInfo), exGWCacheMutex: sync.RWMutex{}, - eIPC: egressIPController{ - egressIPAssignmentMutex: &sync.Mutex{}, - podAssignmentMutex: &sync.Mutex{}, - nodeIPUpdateMutex: &sync.Mutex{}, - podAssignment: make(map[string]*podAssignmentState), - pendingCloudPrivateIPConfigsMutex: &sync.Mutex{}, - pendingCloudPrivateIPConfigsOps: make(map[string]map[string]*cloudPrivateIPConfigOp), - allocator: allocator{&sync.Mutex{}, make(map[string]*egressNode)}, - nbClient: cnci.nbClient, - watchFactory: cnci.watchFactory, - egressIPTotalTimeout: config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout, - reachabilityCheckInterval: egressIPReachabilityCheckInterval, - egressIPNodeHealthCheckPort: config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort, + eIPC: egressIPZoneController{ + nodeIPUpdateMutex: &sync.Mutex{}, + podAssignmentMutex: &sync.Mutex{}, + podAssignment: make(map[string]*podAssignmentState), + nbClient: cnci.nbClient, + watchFactory: cnci.watchFactory, + nodeZoneState: syncmap.NewSyncMap[bool](), }, loadbalancerClusterCache: make(map[kapi.Protocol]string), clusterLoadBalancerGroupUUID: "", @@ -235,7 +225,6 @@ func (oc *DefaultNetworkController) initRetryFramework() { oc.retryEgressIPPods = oc.newRetryFramework(factory.EgressIPPodType) oc.retryEgressNodes = oc.newRetryFramework(factory.EgressNodeType) oc.retryEgressFwNodes = oc.newRetryFramework(factory.EgressFwNodeType) - oc.retryCloudPrivateIPConfig = oc.newRetryFramework(factory.CloudPrivateIPConfigType) oc.retryNamespaces = oc.newRetryFramework(factory.NamespaceType) oc.retryNetworkPolicies = oc.newRetryFramework(factory.PolicyType) } @@ -455,11 +444,6 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { if err := WithSyncDurationMetric("egress ip", oc.WatchEgressIP); err != nil { return err } - if util.PlatformTypeIsEgressIPCloudProvider() { - if err := WithSyncDurationMetric("could private ip config", oc.WatchCloudPrivateIPConfig); err != nil { - return err - } - } if config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout == 0 { klog.V(2).Infof("EgressIP node reachability check disabled") } else if config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort != 0 { @@ -772,7 +756,23 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from case factory.EgressNodeType: node := obj.(*kapi.Node) - return h.oc.reconcileNodeForEgressIP(nil, node) + // Update node in zone cache; value will be true if node is local + // to this zone and false if its not + h.oc.eIPC.nodeZoneState.LockKey(node.Name) + h.oc.eIPC.nodeZoneState.Store(node.Name, h.oc.isLocalZoneNode(node)) + h.oc.eIPC.nodeZoneState.UnlockKey(node.Name) + // add the nodeIP to the default LRP (102 priority) destination address-set + err := h.oc.ensureDefaultNoRerouteNodePolicies() + if err != nil { + return err + } + // add the GARP configuration for all the new nodes we get + // since we use the "exclude-lb-vips-from-garp": "true" + // we shouldn't have scale issues + // NOTE: Adding GARP needs to be done only during node add + // It is a one time operation and doesn't need to be done during + // node updates. It needs to be done only for nodes local to this zone + return h.oc.addEgressNode(node) case factory.EgressFwNodeType: node := obj.(*kapi.Node) @@ -782,10 +782,6 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from return err } - case factory.CloudPrivateIPConfigType: - cloudPrivateIPConfig := obj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) - return h.oc.reconcileCloudPrivateIPConfig(nil, cloudPrivateIPConfig) - case factory.NamespaceType: ns, ok := obj.(*kapi.Namespace) if !ok { @@ -896,18 +892,26 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int case factory.EgressNodeType: oldNode := oldObj.(*kapi.Node) newNode := newObj.(*kapi.Node) - return h.oc.reconcileNodeForEgressIP(oldNode, newNode) + // Update node in zone cache; value will be true if node is local + // to this zone and false if its not + h.oc.eIPC.nodeZoneState.LockKey(newNode.Name) + h.oc.eIPC.nodeZoneState.Store(newNode.Name, h.oc.isLocalZoneNode(newNode)) + h.oc.eIPC.nodeZoneState.UnlockKey(newNode.Name) + // update the nodeIP in the defalt-reRoute (102 priority) destination address-set + if util.NodeHostAddressesAnnotationChanged(oldNode, newNode) { + klog.Infof("Egress IP detected IP address change for node %s. Updating no re-route policies", newNode.Name) + err := h.oc.ensureDefaultNoRerouteNodePolicies() + if err != nil { + return err + } + } + return nil case factory.EgressFwNodeType: oldNode := oldObj.(*kapi.Node) newNode := newObj.(*kapi.Node) return h.oc.updateEgressFirewallForNode(oldNode, newNode) - case factory.CloudPrivateIPConfigType: - oldCloudPrivateIPConfig := oldObj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) - newCloudPrivateIPConfig := newObj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) - return h.oc.reconcileCloudPrivateIPConfig(oldCloudPrivateIPConfig, newCloudPrivateIPConfig) - case factory.NamespaceType: oldNs, newNs := oldObj.(*kapi.Namespace), newObj.(*kapi.Namespace) return h.oc.updateNamespace(oldNs, newNs) @@ -966,7 +970,20 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int case factory.EgressNodeType: node := obj.(*kapi.Node) - return h.oc.reconcileNodeForEgressIP(node, nil) + // remove the GARP setup for the node + if err := h.oc.deleteEgressNode(node); err != nil { + return err + } + // remove the IPs from the destination address-set of the default LRP (102) + err := h.oc.ensureDefaultNoRerouteNodePolicies() + if err != nil { + return err + } + // Update node in zone cache; remove the node key since node has been deleted. + h.oc.eIPC.nodeZoneState.LockKey(node.Name) + h.oc.eIPC.nodeZoneState.Delete(node.Name) + h.oc.eIPC.nodeZoneState.UnlockKey(node.Name) + return nil case factory.EgressFwNodeType: node, ok := obj.(*kapi.Node) @@ -975,10 +992,6 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int } return h.oc.updateEgressFirewallForNode(node, nil) - case factory.CloudPrivateIPConfigType: - cloudPrivateIPConfig := obj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) - return h.oc.reconcileCloudPrivateIPConfig(cloudPrivateIPConfig, nil) - case factory.NamespaceType: ns := obj.(*kapi.Namespace) return h.oc.deleteNamespace(ns) @@ -1018,8 +1031,7 @@ func (h *defaultNetworkControllerEventHandler) SyncFunc(objs []interface{}) erro syncFunc = nil case factory.EgressIPPodType, - factory.EgressIPType, - factory.CloudPrivateIPConfigType: + factory.EgressIPType: syncFunc = nil case factory.NamespaceType: diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index ec43526802..72f62577fe 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1,21 +1,15 @@ package ovn import ( - "context" - "encoding/hex" "encoding/json" "errors" "fmt" "net" - "os" "reflect" - "sort" "strings" "sync" - "syscall" "time" - ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/ovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -26,7 +20,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -57,256 +51,103 @@ func getEgressIPAddrSetDbIDs(name egressIpAddrSetName, controller string) *libov }) } -type egressIPDialer interface { - dial(ip net.IP, timeout time.Duration) bool -} - -var dialer egressIPDialer = &egressIPDial{} - -type healthcheckClientAllocator interface { - allocate(nodeName string) healthcheck.EgressIPHealthClient -} - -var hccAllocator healthcheckClientAllocator = &egressIPHealthcheckClientAllocator{} - +// main reconcile functions begin here + +// reconcileEgressIP reconciles the database configuration +// setup in nbdb based on the received egressIP objects +// CASE 1: if old == nil && new != nil {add event, we do a full setup for all statuses} +// CASE 2: if old != nil && new == nil {delete event, we do a full teardown for all statuses} +// CASE 3: if old != nil && new != nil {update event, +// CASE 3.1: we calculate based on difference between old and new statuses +// which ones need teardown and which ones need setup +// this ensures there is no disruption for things that did not change +// CASE 3.2: Only Namespace selectors on Spec changed +// CASE 3.3: Only Pod Selectors on Spec changed +// CASE 3.4: Both Namespace && Pod Selectors on Spec changed +// } +// NOTE: `Spec.EgressIPs`` updates for EIP object are not processed here, that is the job of cluster manager +// We only care about `Spec.NamespaceSelector`, `Spec.PodSelector` and `Status` field func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.EgressIP) (err error) { - // Lock the assignment, this is needed because this function can end up - // being called from WatchEgressNodes and WatchEgressIP, i.e: two different - // go-routines and we need to make sure the assignment is safe. - oc.eIPC.egressIPAssignmentMutex.Lock() - defer oc.eIPC.egressIPAssignmentMutex.Unlock() - - // Initialize an empty name which is filled depending on the operation - // (ADD/UPDATE/DELETE) we are performing. This is done as to be able to - // delete the NB DB set up correctly when searching the DB based on the - // name. - name := "" - - // Initialize a status which will be used to compare against - // new.spec.egressIPs and decide on what from the status should get deleted - // or kept. - status := []egressipv1.EgressIPStatusItem{} - - // Initialize two empty objects as to avoid SIGSEGV. The code should play - // nicely with empty objects though. - oldEIP, newEIP := &egressipv1.EgressIP{}, &egressipv1.EgressIP{} - - // Initialize two "nothing" selectors. Nothing selector are semantically - // opposed to "empty" selectors, i.e: they select and match nothing, while - // an empty one matches everything. If old/new are nil, and we don't do - // this: we would have an empty EgressIP object which would result in two - // empty selectors, matching everything, whereas we would mean the inverse - newNamespaceSelector, _ := metav1.LabelSelectorAsSelector(nil) - oldNamespaceSelector, _ := metav1.LabelSelectorAsSelector(nil) - // Initialize a sets.String which holds egress IPs that were not fully assigned - // but are allocated and they are meant to be removed. - staleEgressIPs := sets.NewString() - if old != nil { - oldEIP = old - oldNamespaceSelector, err = metav1.LabelSelectorAsSelector(&oldEIP.Spec.NamespaceSelector) - if err != nil { - return fmt.Errorf("invalid old namespaceSelector, err: %v", err) - } - name = oldEIP.Name - status = oldEIP.Status.Items - staleEgressIPs.Insert(oldEIP.Spec.EgressIPs...) - } - if new != nil { - newEIP = new - newNamespaceSelector, err = metav1.LabelSelectorAsSelector(&newEIP.Spec.NamespaceSelector) - if err != nil { - return fmt.Errorf("invalid new namespaceSelector, err: %v", err) - } - name = newEIP.Name - status = newEIP.Status.Items - if staleEgressIPs.Len() > 0 { - for _, egressIP := range newEIP.Spec.EgressIPs { - if staleEgressIPs.Has(egressIP) { - staleEgressIPs.Delete(egressIP) - } + // CASE 1: EIP object deletion, we need to teardown database configuration for all the statuses + if old != nil && new == nil { + removeStatus := old.Status.Items + if len(removeStatus) > 0 { + if err := oc.deleteEgressIPAssignments(old.Name, removeStatus); err != nil { + return err } } } - - // We do not initialize a nothing selector for the podSelector, because - // these are allowed to be empty (i.e: matching all pods in a namespace), as - // supposed to the namespaceSelector - newPodSelector, err := metav1.LabelSelectorAsSelector(&newEIP.Spec.PodSelector) - if err != nil { - return fmt.Errorf("invalid new podSelector, err: %v", err) - } - oldPodSelector, err := metav1.LabelSelectorAsSelector(&oldEIP.Spec.PodSelector) - if err != nil { - return fmt.Errorf("invalid old podSelector, err: %v", err) - } - - // Validate the spec and use only the valid egress IPs when performing any - // successive operations, theoretically: the user could specify invalid IP - // addresses, which would break us. - validSpecIPs, err := oc.validateEgressIPSpec(name, newEIP.Spec.EgressIPs) - if err != nil { - return fmt.Errorf("invalid EgressIP spec, err: %v", err) - } - - // Validate the status, on restart it could be the case that what might have - // been assigned when ovnkube-master last ran is not a valid assignment - // anymore (specifically if ovnkube-master has been crashing for a while). - // Any invalid status at this point in time needs to be removed and assigned - // to a valid node. - validStatus, invalidStatus := oc.validateEgressIPStatus(name, status) - for status := range validStatus { - // If the spec has changed and an egress IP has been removed by the - // user: we need to un-assign that egress IP - if !validSpecIPs.Has(status.EgressIP) { - invalidStatus[status] = "" - delete(validStatus, status) - } - } - - invalidStatusLen := len(invalidStatus) - if invalidStatusLen > 0 { - metrics.RecordEgressIPRebalance(invalidStatusLen) - } - - // Add only the diff between what is requested and valid and that which - // isn't already assigned. - ipsToAssign := validSpecIPs - ipsToRemove := sets.New[string]() - statusToAdd := make([]egressipv1.EgressIPStatusItem, 0, len(ipsToAssign)) - statusToKeep := make([]egressipv1.EgressIPStatusItem, 0, len(validStatus)) - for status := range validStatus { - statusToKeep = append(statusToKeep, status) - ipsToAssign.Delete(status.EgressIP) - } - statusToRemove := make([]egressipv1.EgressIPStatusItem, 0, invalidStatusLen) - for status := range invalidStatus { - statusToRemove = append(statusToRemove, status) - ipsToRemove.Insert(status.EgressIP) - } - if ipsToRemove.Len() > 0 { - // The following is added as to ensure that we only add after having - // successfully removed egress IPs. This case is not very important on - // bare-metal (since we execute the add after the remove below, and - // hence have full control of the execution - barring its success), but - // on a cloud: we don't execute anything below, we wait for the status - // on the CloudPrivateIPConfig(s) we create to be set before executing - // anything in the OVN DB. So, we need to make sure that we delete and - // then add, mainly because if EIP1 is added to nodeX and then EIP2 is - // removed from nodeX, we might remove the setup made for EIP1. The - // add/delete ordering of events is not guaranteed on the cloud where we - // depend on other controllers to execute the work for us however. By - // comparing the spec to the status and applying the following truth - // table we can ensure that order of events. - - // case ID | Egress IP to add | Egress IP to remove | ipsToAssign - // 1 | e1 | e1 | e1 - // 2 | e2 | e1 | - - // 3 | e2 | - | e2 - // 4 | - | e1 | - - - // Case 1 handles updates. Case 2 and 3 makes sure we don't add until we - // successfully delete. Case 4 just shows an example of what would - // happen if we don't have anything to add - ipsToAssign = ipsToAssign.Intersection(ipsToRemove) - } - - if !util.PlatformTypeIsEgressIPCloudProvider() { - if len(statusToRemove) > 0 { - // Delete the statusToRemove from the allocator cache. If we don't - // do this we will occupy assignment positions for the ipsToAssign, - // even though statusToRemove will be removed afterwards - oc.deleteAllocatorEgressIPAssignments(statusToRemove) - if err := oc.deleteEgressIPAssignments(name, statusToRemove); err != nil { + // CASE 2: EIP object addition, we need to setup database configuration for all the statuses + if old == nil && new != nil { + addStatus := new.Status.Items + if len(addStatus) > 0 { + if err := oc.addEgressIPAssignments(new.Name, addStatus, new.Spec.NamespaceSelector, new.Spec.PodSelector); err != nil { return err } } - if len(ipsToAssign) > 0 { - statusToAdd = oc.assignEgressIPs(name, ipsToAssign.UnsortedList()) - statusToKeep = append(statusToKeep, statusToAdd...) - } - // Assign all statusToKeep, we need to warm up the podAssignment cache - // on restart. We won't perform any additional transactions to the NB DB - // for things which exists because the libovsdb operations use - // modelClient which is idempotent. - if err := oc.addEgressIPAssignments(name, statusToKeep, newEIP.Spec.NamespaceSelector, newEIP.Spec.PodSelector); err != nil { - return err - } - // Add all assignments which are to be kept to the allocator cache, - // allowing us to track all assignments which have been performed and - // avoid incorrect future assignments due to a de-synchronized cache. - oc.addAllocatorEgressIPAssignments(name, statusToKeep) - // Update the object only on an ADD/UPDATE. If we are processing a - // DELETE, new will be nil and we should not update the object. - if len(statusToAdd) > 0 || (len(statusToRemove) > 0 && new != nil) { - if err := oc.patchReplaceEgressIPStatus(name, statusToKeep); err != nil { - return err + } + // CASE 3: EIP object update + if old != nil && new != nil { + oldEIP := old + newEIP := new + // CASE 3.1: we need to see which statuses + // 1) need teardown + // 2) need setup + // 3) need no-op + if !reflect.DeepEqual(oldEIP.Status.Items, newEIP.Status.Items) { + statusToRemove := make(map[string]egressipv1.EgressIPStatusItem, 0) + statusToKeep := make(map[string]egressipv1.EgressIPStatusItem, 0) + for _, status := range oldEIP.Status.Items { + statusToRemove[status.EgressIP] = status + } + for _, status := range newEIP.Status.Items { + statusToKeep[status.EgressIP] = status + } + // only delete items that were in the oldSpec but cannot be found in the newSpec + statusToDelete := make([]egressipv1.EgressIPStatusItem, 0) + for eIP, oldStatus := range statusToRemove { + if newStatus, ok := statusToKeep[eIP]; ok && newStatus.Node == oldStatus.Node { + continue + } + statusToDelete = append(statusToDelete, oldStatus) } - } - } else { - // Even when running on a public cloud, we must make sure that we unwire EgressIP - // configuration from OVN *before* we instruct the CloudNetworkConfigController - // to remove the CloudPrivateIPConfig object from the cloud. - // CloudPrivateIPConfig objects can be in the "Deleting" state for a long time, - // waiting for the underlying cloud to finish its action and to report success of the - // unattach operation. Some clouds such as Azure will remove the IP address nearly - // immediately, but then they will take a long time (seconds to minutes) to actually report - // success of the removal operation. - if len(statusToRemove) > 0 { - // Delete all assignments that are to be removed from the allocator - // cache. If we don't do this we will occupy assignment positions for - // the ipsToAdd, even though statusToRemove will be removed afterwards - oc.deleteAllocatorEgressIPAssignments(statusToRemove) - if err := oc.deleteEgressIPAssignments(name, statusToRemove); err != nil { - return err + if len(statusToDelete) > 0 { + if err := oc.deleteEgressIPAssignments(old.Name, statusToDelete); err != nil { + return err + } } - } - // When egress IP is not fully assigned to a node, then statusToRemove may not - // have those entries, hence retrieve it from staleEgressIPs for removing - // the item from cloudprivateipconfig. - for _, toRemove := range statusToRemove { - if !staleEgressIPs.Has(toRemove.EgressIP) { - continue + // only add items that were NOT in the oldSpec but can be found in the newSpec + statusToAdd := make([]egressipv1.EgressIPStatusItem, 0) + for eIP, newStatus := range statusToKeep { + if oldStatus, ok := statusToRemove[eIP]; ok && oldStatus.Node == newStatus.Node { + continue + } + statusToAdd = append(statusToAdd, newStatus) } - staleEgressIPs.Delete(toRemove.EgressIP) - } - for staleEgressIP := range staleEgressIPs { - if nodeName := oc.deleteAllocatorEgressIPAssignmentIfExists(name, staleEgressIP); nodeName != "" { - statusToRemove = append(statusToRemove, - egressipv1.EgressIPStatusItem{EgressIP: staleEgressIP, Node: nodeName}) + if len(statusToAdd) > 0 { + if err := oc.addEgressIPAssignments(new.Name, statusToAdd, new.Spec.NamespaceSelector, new.Spec.PodSelector); err != nil { + return err + } } } - // If running on a public cloud we should not program OVN just yet for assignment - // operations. We need confirmation from the cloud-network-config-controller that - // it can assign the IPs. reconcileCloudPrivateIPConfig will take care of - // processing the answer from the requests we make here, and update OVN - // accordingly when we know what the outcome is. - if len(ipsToAssign) > 0 { - statusToAdd = oc.assignEgressIPs(name, ipsToAssign.UnsortedList()) - statusToKeep = append(statusToKeep, statusToAdd...) - } - // Same as above: Add all assignments which are to be kept to the - // allocator cache, allowing us to track all assignments which have been - // performed and avoid incorrect future assignments due to a - // de-synchronized cache. - oc.addAllocatorEgressIPAssignments(name, statusToKeep) - - // Execute CloudPrivateIPConfig changes for assignments which need to be - // added/removed, assignments which don't change do not require any - // further setup. - if err := oc.executeCloudPrivateIPConfigChange(name, statusToAdd, statusToRemove); err != nil { - return err - } - } - - // Record the egress IP allocator count - metrics.RecordEgressIPCount(getEgressIPAllocationTotalCount(oc.eIPC.allocator)) - // If nothing has changed for what concerns the assignments, then check if - // the namespaceSelector and podSelector have changed. If they have changed - // then remove the setup for all pods which matched the old and add - // everything for all pods which match the new. - if len(ipsToAssign) == 0 && - len(statusToRemove) == 0 { + oldNamespaceSelector, err := metav1.LabelSelectorAsSelector(&oldEIP.Spec.NamespaceSelector) + if err != nil { + return fmt.Errorf("invalid old namespaceSelector, err: %v", err) + } + newNamespaceSelector, err := metav1.LabelSelectorAsSelector(&newEIP.Spec.NamespaceSelector) + if err != nil { + return fmt.Errorf("invalid new namespaceSelector, err: %v", err) + } + oldPodSelector, err := metav1.LabelSelectorAsSelector(&oldEIP.Spec.PodSelector) + if err != nil { + return fmt.Errorf("invalid old podSelector, err: %v", err) + } + newPodSelector, err := metav1.LabelSelectorAsSelector(&newEIP.Spec.PodSelector) + if err != nil { + return fmt.Errorf("invalid new podSelector, err: %v", err) + } + // CASE 3.2: Only Namespace selectors on Spec changed // Only the namespace selector changed: remove the setup for all pods // matching the old and not matching the new, and add setup for the pod // matching the new and which didn't match the old. @@ -318,16 +159,17 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, namespace := range namespaces { namespaceLabels := labels.Set(namespace.Labels) if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { - if err := oc.deleteNamespaceEgressIPAssignment(name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { + if err := oc.deleteNamespaceEgressIPAssignment(oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { return err } } if newNamespaceSelector.Matches(namespaceLabels) && !oldNamespaceSelector.Matches(namespaceLabels) { - if err := oc.addNamespaceEgressIPAssignments(name, newEIP.Status.Items, namespace, newEIP.Spec.PodSelector); err != nil { + if err := oc.addNamespaceEgressIPAssignments(newEIP.Name, newEIP.Status.Items, namespace, newEIP.Spec.PodSelector); err != nil { return err } } } + // CASE 3.3: Only Pod Selectors on Spec changed // Only the pod selector changed: remove the setup for all pods // matching the old and not matching the new, and add setup for the pod // matching the new and which didn't match the old. @@ -344,7 +186,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, pod := range pods { podLabels := labels.Set(pod.Labels) if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { - if err := oc.deletePodEgressIPAssignments(name, oldEIP.Status.Items, pod); err != nil { + if err := oc.deletePodEgressIPAssignments(oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return err } } @@ -352,12 +194,13 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres continue } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { - if err := oc.addPodEgressIPAssignmentsWithLock(name, newEIP.Status.Items, pod); err != nil { + if err := oc.addPodEgressIPAssignmentsWithLock(newEIP.Name, newEIP.Status.Items, pod); err != nil { return err } } } } + // CASE 3.4: Both Namespace && Pod Selectors on Spec changed // Both selectors changed: remove the setup for pods matching the // old ones and not matching the new ones, and add setup for all // matching the new ones but which didn't match the old ones. @@ -371,7 +214,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres // If the namespace does not match anymore then there's no // reason to look at the pod selector. if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { - if err := oc.deleteNamespaceEgressIPAssignment(name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { + if err := oc.deleteNamespaceEgressIPAssignment(oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { return err } } @@ -387,7 +230,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, pod := range pods { podLabels := labels.Set(pod.Labels) if newPodSelector.Matches(podLabels) { - if err := oc.addPodEgressIPAssignmentsWithLock(name, newEIP.Status.Items, pod); err != nil { + if err := oc.addPodEgressIPAssignmentsWithLock(newEIP.Name, newEIP.Status.Items, pod); err != nil { return err } } @@ -403,7 +246,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, pod := range pods { podLabels := labels.Set(pod.Labels) if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { - if err := oc.deletePodEgressIPAssignments(name, oldEIP.Status.Items, pod); err != nil { + if err := oc.deletePodEgressIPAssignments(oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return err } } @@ -411,7 +254,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres continue } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { - if err := oc.addPodEgressIPAssignmentsWithLock(name, newEIP.Status.Items, pod); err != nil { + if err := oc.addPodEgressIPAssignmentsWithLock(newEIP.Name, newEIP.Status.Items, pod); err != nil { return err } } @@ -423,6 +266,9 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres return nil } +// reconcileEgressIPNamespace reconciles the database configuration setup in nbdb +// based on received namespace objects. +// NOTE: we only care about namespace label updates func (oc *DefaultNetworkController) reconcileEgressIPNamespace(old, new *v1.Namespace) error { // Same as for reconcileEgressIP: labels play nicely with empty object, not // nil ones. @@ -468,6 +314,9 @@ func (oc *DefaultNetworkController) reconcileEgressIPNamespace(old, new *v1.Name return nil } +// reconcileEgressIPPod reconciles the database configuration setup in nbdb +// based on received pod objects. +// NOTE: we only care about pod label updates func (oc *DefaultNetworkController) reconcileEgressIPPod(old, new *v1.Pod) (err error) { oldPod, newPod := &v1.Pod{}, &v1.Pod{} namespace := &v1.Namespace{} @@ -569,479 +418,7 @@ func (oc *DefaultNetworkController) reconcileEgressIPPod(old, new *v1.Pod) (err return nil } -func (oc *DefaultNetworkController) reconcileCloudPrivateIPConfig(old, new *ocpcloudnetworkapi.CloudPrivateIPConfig) error { - oldCloudPrivateIPConfig, newCloudPrivateIPConfig := &ocpcloudnetworkapi.CloudPrivateIPConfig{}, &ocpcloudnetworkapi.CloudPrivateIPConfig{} - shouldDelete, shouldAdd := false, false - nodeToDelete := "" - - if old != nil { - oldCloudPrivateIPConfig = old - // We need to handle three types of deletes, A) object UPDATE where the - // old egress IP <-> node assignment has been removed. This is indicated - // by the old object having a .status.node set and the new object having - // .status.node empty and the condition on the new being successful. B) - // object UPDATE where egress IP <-> node assignment has been updated. - // This is indicated by .status.node being different on old and new - // objects. C) object DELETE, for which new is nil - shouldDelete = oldCloudPrivateIPConfig.Status.Node != "" || new == nil - // On DELETE we need to delete the .spec.node for the old object - nodeToDelete = oldCloudPrivateIPConfig.Spec.Node - } - if new != nil { - newCloudPrivateIPConfig = new - // We should only proceed to setting things up for objects where the new - // object has the same .spec.node and .status.node, and assignment - // condition being true. This is how the cloud-network-config-controller - // indicates a successful cloud assignment. - shouldAdd = newCloudPrivateIPConfig.Status.Node == newCloudPrivateIPConfig.Spec.Node && - ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned && - kapi.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == kapi.ConditionTrue - // See above explanation for the delete - shouldDelete = shouldDelete && - (newCloudPrivateIPConfig.Status.Node == "" || newCloudPrivateIPConfig.Status.Node != oldCloudPrivateIPConfig.Status.Node) && - ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned && - kapi.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == kapi.ConditionTrue - // On UPDATE we need to delete the old .status.node - if shouldDelete { - nodeToDelete = oldCloudPrivateIPConfig.Status.Node - } - } - - // As opposed to reconcileEgressIP, here we are only interested in changes - // made to the status (since we are the only ones performing the change made - // to the spec). So don't process the object if there is no change made to - // the status. - if reflect.DeepEqual(oldCloudPrivateIPConfig.Status, newCloudPrivateIPConfig.Status) { - return nil - } - - if shouldDelete { - // Get the EgressIP owner reference - egressIPName, exists := oldCloudPrivateIPConfig.Annotations[util.OVNEgressIPOwnerRefLabel] - if !exists { - // If a CloudPrivateIPConfig object does not have an egress IP owner reference annotation upon deletion, - // there is no way that the object will get one after deletion. Hence, simply log a warning message here - // for informative purposes instead of throwing the same error and retrying time and time again. - klog.Warningf("CloudPrivateIPConfig object %q was missing the egress IP owner reference annotation "+ - "upon deletion", oldCloudPrivateIPConfig.Name) - return nil - } - // Check if the egress IP has been deleted or not, if we are processing - // a CloudPrivateIPConfig delete because the EgressIP has been deleted - // then we need to remove the setup made for it, but not update the - // object. - egressIP, err := oc.kube.GetEgressIP(egressIPName) - isDeleted := apierrors.IsNotFound(err) - if err != nil && !isDeleted { - return err - } - egressIPString := cloudPrivateIPConfigNameToIPString(oldCloudPrivateIPConfig.Name) - statusItem := egressipv1.EgressIPStatusItem{ - Node: nodeToDelete, - EgressIP: egressIPString, - } - // In many cases, this here is likely redundant as we already run this inside - // reconcileEgressIP before instructing the CloudPrivateIP reconciler to delete - // it again. But running oc.deleteEgressIPAssignments twice shouldn't hurt, and - // this is also needed if someone manually deletes the CloudPrivateIP, but keeps - // the EgressIP. Therefore, for safe measure, better delete the flows twice. In - // the future, let's possibly reevaluate if this is needed. - if err := oc.deleteEgressIPAssignments(egressIPName, []egressipv1.EgressIPStatusItem{statusItem}); err != nil { - return err - } - // If we are not processing a delete, update the EgressIP object's - // status assignments - if !isDeleted { - // Deleting a status here means updating the object with the statuses we - // want to keep - updatedStatus := []egressipv1.EgressIPStatusItem{} - for _, status := range egressIP.Status.Items { - if !reflect.DeepEqual(status, statusItem) { - updatedStatus = append(updatedStatus, status) - } - } - if err := oc.patchReplaceEgressIPStatus(egressIP.Name, updatedStatus); err != nil { - return err - } - } - resyncEgressIPs, err := oc.removePendingOpsAndGetResyncs(egressIPName, egressIPString) - if err != nil { - return err - } - for _, resyncEgressIP := range resyncEgressIPs { - if err := oc.reconcileEgressIP(nil, &resyncEgressIP); err != nil { - return fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err) - } - } - } - if shouldAdd { - // Get the EgressIP owner reference - egressIPName, exists := newCloudPrivateIPConfig.Annotations[util.OVNEgressIPOwnerRefLabel] - if !exists { - // If a CloudPrivateIPConfig object does not have an egress IP owner reference annotation upon creation - // then we should simply log this as a warning. We should get an update action later down the road where we - // then take care of the rest. Hence, do not throw an error here to avoid rescheduling. Even though not - // officially supported, think of someone creating a CloudPrivateIPConfig object manually which will never - // get the annotation. - klog.Warningf("CloudPrivateIPConfig object %q is missing the egress IP owner reference annotation. Skipping", - oldCloudPrivateIPConfig.Name) - return nil - } - egressIP, err := oc.kube.GetEgressIP(egressIPName) - if err != nil { - return err - } - egressIPString := cloudPrivateIPConfigNameToIPString(newCloudPrivateIPConfig.Name) - statusItem := egressipv1.EgressIPStatusItem{ - Node: newCloudPrivateIPConfig.Status.Node, - EgressIP: egressIPString, - } - if err := oc.addEgressIPAssignments(egressIP.Name, []egressipv1.EgressIPStatusItem{statusItem}, egressIP.Spec.NamespaceSelector, egressIP.Spec.PodSelector); err != nil { - return err - } - // Guard against performing the same assignment twice, which might - // happen when multiple updates come in on the same object. - hasStatus := false - for _, status := range egressIP.Status.Items { - if reflect.DeepEqual(status, statusItem) { - hasStatus = true - break - } - } - if !hasStatus { - statusToKeep := append(egressIP.Status.Items, statusItem) - if err := oc.patchReplaceEgressIPStatus(egressIP.Name, statusToKeep); err != nil { - return err - } - } - - oc.eIPC.pendingCloudPrivateIPConfigsMutex.Lock() - defer oc.eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() - // Remove the finished add / update operation from the pending cache. We - // never process add and deletes in the same sync, and for updates: - // deletes are always performed before adds, hence we should only ever - // fully delete the item from the pending cache once the add has - // finished. - ops, pending := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] - if !pending { - // Do not return an error here, it will lead to spurious error - // messages on restart because we will process a bunch of adds for - // all existing objects, for which no CR was issued. - klog.V(5).Infof("No pending operation found for EgressIP: %s while processing created CloudPrivateIPConfig", egressIPName) - return nil - } - op, exists := ops[egressIPString] - if !exists { - klog.V(5).Infof("Pending operations found for EgressIP: %s, but not for the created CloudPrivateIPConfig: %s", egressIPName, egressIPString) - return nil - } - // Process finalized add / updates, hence: (op.toAdd != "" && - // op.toDelete != "") || (op.toAdd != "" && op.toDelete == ""), which is - // equivalent the below. - if op.toAdd != "" { - delete(ops, egressIPString) - } - if len(ops) == 0 { - delete(oc.eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) - } - } - return nil -} - -// removePendingOps removes the existing pending CloudPrivateIPConfig operations -// from the cache and returns the EgressIP object which can be re-synced given -// the new assignment possibilities. -func (oc *DefaultNetworkController) removePendingOpsAndGetResyncs(egressIPName, egressIP string) ([]egressipv1.EgressIP, error) { - oc.eIPC.pendingCloudPrivateIPConfigsMutex.Lock() - defer oc.eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() - ops, pending := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] - if !pending { - return nil, fmt.Errorf("no pending operation found for EgressIP: %s", egressIPName) - } - op, exists := ops[egressIP] - if !exists { - return nil, fmt.Errorf("pending operations found for EgressIP: %s, but not for the finalized IP: %s", egressIPName, egressIP) - } - // Make sure we are dealing with a delete operation, since for update - // operations will still need to process the add afterwards. - if op.toAdd == "" && op.toDelete != "" { - delete(ops, egressIP) - } - if len(ops) == 0 { - delete(oc.eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) - } - - // Some EgressIP objects might not have all of their spec.egressIPs - // assigned because there was no room to assign them. Hence, every time - // we process a final deletion for a CloudPrivateIPConfig: have a look - // at what other EgressIP objects have something un-assigned, and force - // a reconciliation on them by sending a synthetic update. - egressIPs, err := oc.kube.GetEgressIPs() - if err != nil { - return nil, fmt.Errorf("unable to list EgressIPs, err: %v", err) - } - resyncs := make([]egressipv1.EgressIP, 0, len(egressIPs.Items)) - for _, egressIP := range egressIPs.Items { - // Do not process the egress IP object which owns the - // CloudPrivateIPConfig for which we are currently processing the - // deletion for. - if egressIP.Name == egressIPName { - continue - } - unassigned := len(egressIP.Spec.EgressIPs) - len(egressIP.Status.Items) - ops, pending := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIP.Name] - // If the EgressIP was never added to the pending cache to begin - // with, but has un-assigned egress IPs, try it. - if !pending && unassigned > 0 { - resyncs = append(resyncs, egressIP) - continue - } - // If the EgressIP has pending operations, have a look at if the - // unassigned operations superseed the pending ones. It could be - // that it could only execute a couple of assignments at one point. - if pending && unassigned > len(ops) { - resyncs = append(resyncs, egressIP) - } - } - return resyncs, nil -} - -type cloudPrivateIPConfigOp struct { - toAdd string - toDelete string -} - -// executeCloudPrivateIPConfigChange computes a diff between what needs to be -// assigned/removed and executes the object modification afterwards. -// Specifically: if one egress IP is moved from nodeA to nodeB, we actually care -// about an update on the CloudPrivateIPConfig object represented by that egress -// IP, cloudPrivateIPConfigOp is a helper used to determine that sort of -// operations from toAssign/toRemove -func (oc *DefaultNetworkController) executeCloudPrivateIPConfigChange(egressIPName string, toAssign, toRemove []egressipv1.EgressIPStatusItem) error { - oc.eIPC.pendingCloudPrivateIPConfigsMutex.Lock() - defer oc.eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() - ops := make(map[string]*cloudPrivateIPConfigOp, len(toAssign)+len(toRemove)) - for _, assignment := range toAssign { - ops[assignment.EgressIP] = &cloudPrivateIPConfigOp{ - toAdd: assignment.Node, - } - } - for _, removal := range toRemove { - if op, exists := ops[removal.EgressIP]; exists { - op.toDelete = removal.Node - } else { - ops[removal.EgressIP] = &cloudPrivateIPConfigOp{ - toDelete: removal.Node, - } - } - } - // Merge ops into the existing pendingCloudPrivateIPConfigsOps. - // This allows us to: - // a) execute only the new ops - // b) keep track of any pending changes - if len(ops) > 0 { - if _, ok := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName]; !ok { - // Set all operations for the EgressIP object if none are in the cache currently. - oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] = ops - } else { - for cloudPrivateIP, op := range ops { - if _, ok := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP]; !ok { - // If this specific EgressIP object's CloudPrivateIPConfig address currently has no - // op, simply set it. - oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP] = op - } else { - // If an existing operation for this CloudPrivateIP exists, then the following logic should - // apply: - // If toDelete is currently set: keep the current toDelete. Theoretically, the oldest toDelete - // is the good one. If toDelete if currently not set, overwrite it with the new value. - // If toAdd is currently set: overwrite with the new toAdd. Theoretically, the newest toAdd is - // the good one. - // Therefore, only replace toAdd over a previously existing op and only replace toDelete if - // it's unset. - if op.toAdd != "" { - oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toAdd = op.toAdd - } - if oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toDelete == "" { - oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toDelete = op.toDelete - } - } - } - } - } - return oc.executeCloudPrivateIPConfigOps(egressIPName, ops) -} - -func (oc *DefaultNetworkController) executeCloudPrivateIPConfigOps(egressIPName string, ops map[string]*cloudPrivateIPConfigOp) error { - for egressIP, op := range ops { - cloudPrivateIPConfigName := ipStringToCloudPrivateIPConfigName(egressIP) - cloudPrivateIPConfig, err := oc.watchFactory.GetCloudPrivateIPConfig(cloudPrivateIPConfigName) - // toAdd and toDelete is non-empty, this indicates an UPDATE for which - // the object **must** exist, if not: that's an error. - if op.toAdd != "" && op.toDelete != "" { - if err != nil { - return fmt.Errorf("cloud update request failed for CloudPrivateIPConfig: %s, could not get item, err: %v", cloudPrivateIPConfigName, err) - } - // Do not update if object is being deleted - if !cloudPrivateIPConfig.GetDeletionTimestamp().IsZero() { - return fmt.Errorf("cloud update request failed, CloudPrivateIPConfig: %s is being deleted", cloudPrivateIPConfigName) - } - cloudPrivateIPConfig.Spec.Node = op.toAdd - if _, err := oc.kube.UpdateCloudPrivateIPConfig(cloudPrivateIPConfig); err != nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: egressIPName, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "CloudUpdateFailed", "egress IP: %s for object EgressIP: %s could not be updated, err: %v", egressIP, egressIPName, err) - return fmt.Errorf("cloud update request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) - } - // toAdd is non-empty, this indicates an ADD - // if the object already exists for the specified node that's a no-op - // if the object already exists and the request is for a different node, that's an error - } else if op.toAdd != "" { - if err == nil { - if op.toAdd == cloudPrivateIPConfig.Spec.Node { - klog.Infof("CloudPrivateIPConfig: %s already assigned to node: %s", cloudPrivateIPConfigName, cloudPrivateIPConfig.Spec.Node) - continue - } - return fmt.Errorf("cloud create request failed for CloudPrivateIPConfig: %s, err: item exists", cloudPrivateIPConfigName) - } - cloudPrivateIPConfig := ocpcloudnetworkapi.CloudPrivateIPConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: cloudPrivateIPConfigName, - Annotations: map[string]string{ - util.OVNEgressIPOwnerRefLabel: egressIPName, - }, - }, - Spec: ocpcloudnetworkapi.CloudPrivateIPConfigSpec{ - Node: op.toAdd, - }, - } - if _, err := oc.kube.CreateCloudPrivateIPConfig(&cloudPrivateIPConfig); err != nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: egressIPName, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "CloudAssignmentFailed", "egress IP: %s for object EgressIP: %s could not be created, err: %v", egressIP, egressIPName, err) - return fmt.Errorf("cloud add request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) - } - // toDelete is non-empty, this indicates a DELETE - if the object does not exist, log an Info message and continue with the next op. - // The reason for why we are not throwing an error here is that desired state (deleted) == isState (object not found). - // If for whatever reason we have a pending toDelete op for a deleted object, then this op should simply be silently ignored. - // Any other error, return an error to trigger a retry. - } else if op.toDelete != "" { - if err != nil { - if apierrors.IsNotFound(err) { - klog.Infof("Cloud deletion request failed for CloudPrivateIPConfig: %s, item already deleted, err: %v", cloudPrivateIPConfigName, err) - continue - } else { - return fmt.Errorf("cloud deletion request failed for CloudPrivateIPConfig: %s, could not get item, err: %v", cloudPrivateIPConfigName, err) - } - } - if err := oc.kube.DeleteCloudPrivateIPConfig(cloudPrivateIPConfigName); err != nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: egressIPName, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "CloudDeletionFailed", "egress IP: %s for object EgressIP: %s could not be deleted, err: %v", egressIP, egressIPName, err) - return fmt.Errorf("cloud deletion request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) - } - } - } - return nil -} - -func (oc *DefaultNetworkController) validateEgressIPSpec(name string, egressIPs []string) (sets.Set[string], error) { - validatedEgressIPs := sets.New[string]() - for _, egressIP := range egressIPs { - ip := net.ParseIP(egressIP) - if ip == nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "InvalidEgressIP", "egress IP: %s for object EgressIP: %s is not a valid IP address", egressIP, name) - return nil, fmt.Errorf("unable to parse provided EgressIP: %s, invalid", egressIP) - } - validatedEgressIPs.Insert(ip.String()) - } - return validatedEgressIPs, nil -} - -// validateEgressIPStatus validates if the statuses are valid given what the -// cache knows about all egress nodes. WatchEgressNodes is initialized before -// any other egress IP handler, so te cache should be warm and correct once we -// start going this. -func (oc *DefaultNetworkController) validateEgressIPStatus(name string, items []egressipv1.EgressIPStatusItem) (map[egressipv1.EgressIPStatusItem]string, map[egressipv1.EgressIPStatusItem]string) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - valid, invalid := make(map[egressipv1.EgressIPStatusItem]string), make(map[egressipv1.EgressIPStatusItem]string) - for _, eIPStatus := range items { - validAssignment := true - eNode, exists := oc.eIPC.allocator.cache[eIPStatus.Node] - if !exists { - klog.Errorf("Allocator error: EgressIP: %s claims to have an allocation on a node which is unassignable for egress IP: %s", name, eIPStatus.Node) - validAssignment = false - } else { - if eNode.getAllocationCountForEgressIP(name) > 1 { - klog.Errorf("Allocator error: EgressIP: %s claims multiple egress IPs on same node: %s, will attempt rebalancing", name, eIPStatus.Node) - validAssignment = false - } - if !eNode.isEgressAssignable { - klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which does not have egress label, will attempt rebalancing", name, eIPStatus.Node) - validAssignment = false - } - if !eNode.isReachable { - klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which is not reachable, will attempt rebalancing", name, eIPStatus.Node) - validAssignment = false - } - if !eNode.isReady { - klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which is not ready, will attempt rebalancing", name, eIPStatus.Node) - validAssignment = false - } - ip := net.ParseIP(eIPStatus.EgressIP) - if ip == nil { - klog.Errorf("Allocator error: EgressIP allocation contains unparsable IP address: %s", eIPStatus.EgressIP) - validAssignment = false - } - if node := oc.isAnyClusterNodeIP(ip); node != nil { - klog.Errorf("Allocator error: EgressIP allocation: %s is the IP of node: %s ", ip.String(), node.name) - validAssignment = false - } - if utilnet.IsIPv6(ip) && eNode.egressIPConfig.V6.Net != nil { - if !eNode.egressIPConfig.V6.Net.Contains(ip) { - klog.Errorf("Allocator error: EgressIP allocation: %s on subnet: %s which cannot host it", ip.String(), eNode.egressIPConfig.V4.Net.String()) - validAssignment = false - } - } else if !utilnet.IsIPv6(ip) && eNode.egressIPConfig.V4.Net != nil { - if !eNode.egressIPConfig.V4.Net.Contains(ip) { - klog.Errorf("Allocator error: EgressIP allocation: %s on subnet: %s which cannot host it", ip.String(), eNode.egressIPConfig.V4.Net.String()) - validAssignment = false - } - } else { - klog.Errorf("Allocator error: EgressIP allocation on node: %s which does not support its IP protocol version", eIPStatus.Node) - validAssignment = false - } - } - if validAssignment { - valid[eIPStatus] = "" - } else { - invalid[eIPStatus] = "" - } - } - return valid, invalid -} - -// addAllocatorEgressIPAssignments adds the allocation to the cache, so that -// they are tracked during the life-cycle of ovnkube-master -func (oc *DefaultNetworkController) addAllocatorEgressIPAssignments(name string, statusAssignments []egressipv1.EgressIPStatusItem) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - for _, status := range statusAssignments { - if eNode, exists := oc.eIPC.allocator.cache[status.Node]; exists { - eNode.allocations[status.EgressIP] = name - } - } -} +// main reconcile functions end here and local zone controller functions begin func (oc *DefaultNetworkController) addEgressIPAssignments(name string, statusAssignments []egressipv1.EgressIPStatusItem, namespaceSelector, podSelector metav1.LabelSelector) error { namespaces, err := oc.watchFactory.GetNamespacesBySelector(namespaceSelector) @@ -1102,26 +479,54 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu if len(statusAssignments) == 0 { return nil } - var remainingAssignments []egressipv1.EgressIPStatusItem - // Retrieve the pod's networking configuration from the - // logicalPortCache. The reason for doing this: a) only normal network - // pods are placed in this cache, b) once the pod is placed here we know - // addLogicalPort has finished successfully setting up networking for - // the pod, so we can proceed with retrieving its IP and deleting the - // external GW configuration created in addLogicalPort for the pod. - logicalPort, err := oc.logicalPortCache.get(pod, types.DefaultNetworkName) - if err != nil { - return nil + // We need to proceed with add only under two conditions + // 1) egressNode present in at least one status is local to this zone + // (NOTE: The relation between egressIPName and nodeName is 1:1 i.e in the same object the given node will be present only in one status) + // 2) the pod being added is local to this zone + proceed := false + for _, status := range statusAssignments { + oc.eIPC.nodeZoneState.LockKey(status.Node) + isLocalZoneEgressNode, loadedEgressNode := oc.eIPC.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + proceed = true + oc.eIPC.nodeZoneState.UnlockKey(status.Node) + break + } + oc.eIPC.nodeZoneState.UnlockKey(status.Node) } - // Since the logical switch port cache removes entries only 60 seconds - // after deletion, its possible that when pod is recreated with the same name - // within the 60seconds timer, stale info gets used to create SNATs and reroutes - // for the eip pods. Checking if the expiry is set for the port or not can indicate - // if the port is scheduled for deletion. - if !logicalPort.expires.IsZero() { - klog.Warningf("Stale LSP %s for pod %s found in cache refetching", - logicalPort.name, podKey) - return nil + if !proceed && !oc.isPodScheduledinLocalZone(pod) { + return nil // nothing to do if none of the status nodes are local to this master and pod is also remote + } + var remainingAssignments []egressipv1.EgressIPStatusItem + var podIPs []*net.IPNet + var err error + if oc.isPodScheduledinLocalZone(pod) { + // Retrieve the pod's networking configuration from the + // logicalPortCache. The reason for doing this: a) only normal network + // pods are placed in this cache, b) once the pod is placed here we know + // addLogicalPort has finished successfully setting up networking for + // the pod, so we can proceed with retrieving its IP and deleting the + // external GW configuration created in addLogicalPort for the pod. + logicalPort, err := oc.logicalPortCache.get(pod, types.DefaultNetworkName) + if err != nil { + return nil + } + // Since the logical switch port cache removes entries only 60 seconds + // after deletion, its possible that when pod is recreated with the same name + // within the 60seconds timer, stale info gets used to create SNATs and reroutes + // for the eip pods. Checking if the expiry is set for the port or not can indicate + // if the port is scheduled for deletion. + if !logicalPort.expires.IsZero() { + klog.Warningf("Stale LSP %s for pod %s found in cache refetching", + logicalPort.name, podKey) + return nil + } + podIPs = logicalPort.ips + } else { // means this is egress node's local master + podIPs, err = util.GetPodCIDRsWithFullMask(pod, oc.NetInfo) + if err != nil { + return err + } } podState, exists := oc.eIPC.podAssignment[podKey] if !exists { @@ -1160,48 +565,41 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu return nil } for _, status := range remainingAssignments { - klog.V(2).Infof("Adding pod egress IP status: %v for EgressIP: %s and pod: %s/%s", status, name, pod.Name, pod.Namespace) - if err := oc.eIPC.addPodEgressIPAssignment(name, status, pod, logicalPort.ips); err != nil { + klog.V(2).Infof("Adding pod egress IP status: %v for EgressIP: %s and pod: %s/%s/%v", status, name, pod.Namespace, pod.Name, podIPs) + err = oc.eIPC.nodeZoneState.DoWithLock(status.Node, func(key string) error { + if status.Node == pod.Spec.NodeName { + // we are safe, no need to grab lock again + if err := oc.eIPC.addPodEgressIPAssignment(name, status, pod, podIPs); err != nil { + return fmt.Errorf("unable to create egressip configuration for pod %s/%s/%v, err: %w", pod.Namespace, pod.Name, podIPs, err) + } + podState.egressStatuses[status] = "" + return nil + } + return oc.eIPC.nodeZoneState.DoWithLock(pod.Spec.NodeName, func(key string) error { + // we need to grab lock again for pod's node + if err := oc.eIPC.addPodEgressIPAssignment(name, status, pod, podIPs); err != nil { + return fmt.Errorf("unable to create egressip configuration for pod %s/%s/%v, err: %w", pod.Namespace, pod.Name, podIPs, err) + } + podState.egressStatuses[status] = "" + return nil + }) + }) + if err != nil { return err } - podState.egressStatuses[status] = "" } - // add the podIP to the global egressIP address set - addrSetIPs := make([]net.IP, len(logicalPort.ips)) - for i, podIP := range logicalPort.ips { - copyPodIP := *podIP - addrSetIPs[i] = copyPodIP.IP - } - if err := oc.addPodIPsToAddressSet(addrSetIPs); err != nil { - return fmt.Errorf("cannot add egressPodIPs for the pod %s/%s to the address set: err: %v", pod.Namespace, pod.Name, err) - } - return nil -} - -// deleteAllocatorEgressIPAssignmentIfExists deletes egressIP config from node allocations map -// if the entry is available and returns assigned node name, otherwise returns empty string. -func (oc *DefaultNetworkController) deleteAllocatorEgressIPAssignmentIfExists(name, egressIP string) string { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - for nodeName, eNode := range oc.eIPC.allocator.cache { - if egressIPName, exists := eNode.allocations[egressIP]; exists && egressIPName == name { - delete(eNode.allocations, egressIP) - return nodeName + if oc.isPodScheduledinLocalZone(pod) { + // add the podIP to the global egressIP address set + addrSetIPs := make([]net.IP, len(podIPs)) + for i, podIP := range podIPs { + copyPodIP := *podIP + addrSetIPs[i] = copyPodIP.IP } - } - return "" -} - -// deleteAllocatorEgressIPAssignments deletes the allocation as to keep the -// cache state correct, also see addAllocatorEgressIPAssignments -func (oc *DefaultNetworkController) deleteAllocatorEgressIPAssignments(statusAssignments []egressipv1.EgressIPStatusItem) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - for _, status := range statusAssignments { - if eNode, exists := oc.eIPC.allocator.cache[status.Node]; exists { - delete(eNode.allocations, status.EgressIP) + if err := oc.addPodIPsToAddressSet(addrSetIPs); err != nil { + return fmt.Errorf("cannot add egressPodIPs for the pod %s/%s to the address set: err: %v", pod.Namespace, pod.Name, err) } } + return nil } // deleteEgressIPAssignments performs a full egress IP setup deletion on a per @@ -1215,10 +613,7 @@ func (oc *DefaultNetworkController) deleteEgressIPAssignments(name string, statu var podIPs []net.IP var err error for _, statusToRemove := range statusesToRemove { - klog.V(2).Infof("Deleting pod egress IP status: %v for EgressIP: %s", statusToRemove, name) - if podIPs, err = oc.eIPC.deleteEgressIPStatusSetup(name, statusToRemove); err != nil { - return err - } + removed := false for podKey, podStatus := range oc.eIPC.podAssignment { if podStatus.egressIPName != name { // we can continue here since this pod was not managed by this EIP object @@ -1229,18 +624,34 @@ func (oc *DefaultNetworkController) deleteEgressIPAssignments(name string, statu // we can continue here since this pod was not managed by this statusToRemove continue } - // this pod was managed by statusToRemove.EgressIP; we need to try and add its SNAT back towards nodeIP - podNamespace, podName := getPodNamespaceAndNameFromKey(podKey) - if err = oc.eIPC.addExternalGWPodSNAT(podNamespace, podName, statusToRemove); err != nil { + err = oc.eIPC.nodeZoneState.DoWithLock(statusToRemove.Node, func(key string) error { + // this statusToRemove was managing at least one pod, hence let's tear down the setup for this status + if !removed { + klog.V(2).Infof("Deleting pod egress IP status: %v for EgressIP: %s", statusToRemove, name) + if podIPs, err = oc.eIPC.deleteEgressIPStatusSetup(name, statusToRemove); err != nil { + return err + } + removed = true // we should only tear down once and not per pod since tear down is based on externalIDs + } + // this pod was managed by statusToRemove.EgressIP; we need to try and add its SNAT back towards nodeIP + podNamespace, podName := getPodNamespaceAndNameFromKey(podKey) + if err = oc.eIPC.addExternalGWPodSNAT(podNamespace, podName, statusToRemove); err != nil { + return err + } + delete(podStatus.egressStatuses, statusToRemove) + return nil + }) + if err != nil { return err } - delete(podStatus.egressStatuses, statusToRemove) if len(podStatus.egressStatuses) == 0 && len(podStatus.standbyEgressIPNames) == 0 { // pod could be managed by more than one egressIP // so remove the podKey from cache only if we are sure // there are no more egressStatuses managing this pod klog.V(5).Infof("Deleting pod key %s from assignment cache", podKey) // delete the podIP from the global egressIP address set since its no longer managed by egressIPs + // NOTE(tssurya): There is no way to infer if pod was local to this zone or not, + // so we try to nuke the IP from address-set anyways - it will be a no-op for remote pods if err := oc.deletePodIPsFromAddressSet(podIPs); err != nil { return fmt.Errorf("cannot delete egressPodIPs for the pod %s from the address set: err: %v", podKey, err) } @@ -1303,11 +714,31 @@ func (oc *DefaultNetworkController) deletePodEgressIPAssignments(name string, st return err } for _, statusToRemove := range statusesToRemove { + if _, ok := podStatus.egressStatuses[statusToRemove]; !ok { + // we can continue here since this pod was not managed by this statusToRemove + continue + } klog.V(2).Infof("Deleting pod egress IP status: %v for EgressIP: %s and pod: %s/%s", statusToRemove, name, pod.Name, pod.Namespace) - if err := oc.eIPC.deletePodEgressIPAssignment(name, statusToRemove, pod, podIPs); err != nil { + err = oc.eIPC.nodeZoneState.DoWithLock(statusToRemove.Node, func(key string) error { + if statusToRemove.Node == pod.Spec.NodeName { + // we are safe, no need to grab lock again + if err := oc.eIPC.deletePodEgressIPAssignment(name, statusToRemove, pod, podIPs); err != nil { + return err + } + delete(podStatus.egressStatuses, statusToRemove) + return nil + } + return oc.eIPC.nodeZoneState.DoWithLock(pod.Spec.NodeName, func(key string) error { + if err := oc.eIPC.deletePodEgressIPAssignment(name, statusToRemove, pod, podIPs); err != nil { + return err + } + delete(podStatus.egressStatuses, statusToRemove) + return nil + }) + }) + if err != nil { return err } - delete(podStatus.egressStatuses, statusToRemove) } // Delete the key if there are no more status assignments to keep // for the pod. @@ -1316,42 +747,37 @@ func (oc *DefaultNetworkController) deletePodEgressIPAssignments(name string, st // so remove the podKey from cache only if we are sure // there are no more egressStatuses managing this pod klog.V(5).Infof("Deleting pod key %s from assignment cache", podKey) - // delete the podIP from the global egressIP address set - addrSetIPs := make([]net.IP, len(podIPs)) - for i, podIP := range podIPs { - copyPodIP := *podIP - addrSetIPs[i] = copyPodIP.IP - } - if err := oc.deletePodIPsFromAddressSet(addrSetIPs); err != nil { - return fmt.Errorf("cannot delete egressPodIPs for the pod %s from the address set: err: %v", podKey, err) + if oc.isPodScheduledinLocalZone(pod) { + // delete the podIP from the global egressIP address set + addrSetIPs := make([]net.IP, len(podIPs)) + for i, podIP := range podIPs { + copyPodIP := *podIP + addrSetIPs[i] = copyPodIP.IP + } + if err := oc.deletePodIPsFromAddressSet(addrSetIPs); err != nil { + return fmt.Errorf("cannot delete egressPodIPs for the pod %s from the address set: err: %v", podKey, err) + } } delete(oc.eIPC.podAssignment, podKey) } return nil } -func (oc *DefaultNetworkController) isEgressNodeReady(egressNode *kapi.Node) bool { - for _, condition := range egressNode.Status.Conditions { - if condition.Type == v1.NodeReady { - return condition.Status == v1.ConditionTrue - } - } - return false -} - -func (oc *DefaultNetworkController) isEgressNodeReachable(egressNode *kapi.Node) bool { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if eNode, exists := oc.eIPC.allocator.cache[egressNode.Name]; exists { - return eNode.isReachable || oc.isReachable(eNode.name, eNode.mgmtIPs, eNode.healthClient) - } - return false -} - type egressIPCacheEntry struct { - egressPods map[string]sets.Set[string] + // egressLocalPods will contain all the pods that + // are local to this zone being served by thie egressIP + // object. This will help sync LRP & LRSR. + egressLocalPods map[string]sets.Set[string] + // egressRemotePods will contain all the remote pods + // that are being served by this egressIP object + // This will help sync SNATs. + egressRemotePods map[string]sets.Set[string] // will be used only when multizone IC is enabled gatewayRouterIPs sets.Set[string] egressIPs map[string]string + // egressLocalNodes will contain all nodes that are local + // to this zone which are serving this egressIP object.. + // This will help sync SNATs + egressLocalNodes sets.Set[string] } func (oc *DefaultNetworkController) syncEgressIPs(namespaces []interface{}) error { @@ -1367,6 +793,13 @@ func (oc *DefaultNetworkController) syncEgressIPs(namespaces []interface{}) erro // This function is called when handlers for EgressIPNamespaceType are started // since namespaces is the first object that egressIP feature starts watching + // update localZones cache of eIPCZoneController + // WatchNodes() is called before WatchEgressIPNamespaces() so the oc.localZones cache + // will be updated whereas WatchEgressNodes() is called after WatchEgressIPNamespaces() + // and so we must update the cache to ensure we are not stale. + if err := oc.syncLocalNodeZonesCache(); err != nil { + return fmt.Errorf("syncLocalNodeZonesCache unable to update the local zones node cache: %v", err) + } egressIPCache, err := oc.generateCacheForEgressIP() if err != nil { return fmt.Errorf("syncEgressIPs unable to generate cache for egressip: %v", err) @@ -1386,6 +819,23 @@ func (oc *DefaultNetworkController) syncEgressIPs(namespaces []interface{}) erro return nil } +func (oc *DefaultNetworkController) syncLocalNodeZonesCache() error { + nodes, err := oc.watchFactory.GetNodes() + if err != nil { + return fmt.Errorf("unable to fetch nodes from watch factory %w", err) + } + for _, node := range nodes { + // NOTE: Even at this stage, there can be race; the bnc.zone might be the nodeName + // while the node's annotations are not yet set, so it still shows global. + // The EgressNodeType events (which are basically all node updates) should + // constantly update this cache as nodes get added, updated and removed + oc.eIPC.nodeZoneState.LockKey(node.Name) + oc.eIPC.nodeZoneState.Store(node.Name, oc.isLocalZoneNode(node)) + oc.eIPC.nodeZoneState.UnlockKey(node.Name) + } + return nil +} + func (oc *DefaultNetworkController) syncStaleAddressSetIPs(egressIPCache map[string]egressIPCacheEntry) error { dbIDs := getEgressIPAddrSetDbIDs(EgressIPServedPodsAddrSetName, oc.controllerName) as, err := oc.addressSetFactory.EnsureAddressSet(dbIDs) @@ -1393,8 +843,10 @@ func (oc *DefaultNetworkController) syncStaleAddressSetIPs(egressIPCache map[str return fmt.Errorf("cannot ensure that addressSet for egressIP pods %s exists %v", EgressIPServedPodsAddrSetName, err) } var allEIPServedPodIPs []net.IP + // we only care about local zone pods for the address-set since + // traffic from remote pods towards nodeIP won't even reach this zone for eipName := range egressIPCache { - for _, podIPs := range egressIPCache[eipName].egressPods { + for _, podIPs := range egressIPCache[eipName].egressLocalPods { for podIP := range podIPs { allEIPServedPodIPs = append(allEIPServedPodIPs, net.ParseIP(podIP)) } @@ -1417,14 +869,31 @@ func (oc *DefaultNetworkController) syncPodAssignmentCache(egressIPCache map[str oc.eIPC.podAssignmentMutex.Lock() defer oc.eIPC.podAssignmentMutex.Unlock() for egressIPName, state := range egressIPCache { - p := func(item *nbdb.LogicalRouterPolicy) bool { + p1 := func(item *nbdb.LogicalRouterPolicy) bool { return item.Priority == types.EgressIPReroutePriority && item.ExternalIDs["name"] == egressIPName } - reRoutePolicies, err := libovsdbops.FindLogicalRouterPoliciesWithPredicate(oc.nbClient, p) + reRoutePolicies, err := libovsdbops.FindLogicalRouterPoliciesWithPredicate(oc.nbClient, p1) if err != nil { return err } - for podKey, podIPs := range state.egressPods { + p2 := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.ExternalIDs["name"] == egressIPName + } + reRouteStaticRoutes, err := libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(oc.nbClient, p2) + if err != nil { + return err + } + // Because of how we do generateCacheForEgressIP, we will only have pods that are + // either local to zone (in which case reRoutePolicies will work) OR pods that are + // managed by local egressIP nodes (in which case reRouteStaticRoutes will work) + egressPods := make(map[string]sets.Set[string]) + for podKey, podIPs := range state.egressLocalPods { + egressPods[podKey] = podIPs + } + for podKey, podIPs := range state.egressRemotePods { + egressPods[podKey] = podIPs + } + for podKey, podIPs := range egressPods { podState, ok := oc.eIPC.podAssignment[podKey] if !ok { podState = &podAssignmentState{ @@ -1451,6 +920,14 @@ func (oc *DefaultNetworkController) syncPodAssignmentCache(egressIPCache map[str klog.Infof("EgressIP %s is managing pod %s", egressIPName, podKey) } } + for _, lrsr := range reRouteStaticRoutes { + logicalIP := lrsr.IPPrefix + if podIPs.Has(logicalIP) { // should match for only one egressIP object + podState.egressIPName = egressIPName + podState.standbyEgressIPNames.Delete(egressIPName) + klog.Infof("EgressIP %s is managing pod %s", egressIPName, podKey) + } + } oc.eIPC.podAssignment[podKey] = podState } } @@ -1475,7 +952,10 @@ func (oc *DefaultNetworkController) syncStaleEgressReroutePolicy(egressIPCache m parsedLogicalIP := net.ParseIP(logicalIP) egressPodIPs := sets.NewString() if exists { - for _, podIPs := range cacheEntry.egressPods { + // Since LRPs are created only for pods local to this zone + // we need to care about only those pods. Nexthop for them will + // either be transit switch IP or join switch IP. + for _, podIPs := range cacheEntry.egressLocalPods { egressPodIPs.Insert(podIPs.UnsortedList()...) } } @@ -1531,7 +1011,12 @@ func (oc *DefaultNetworkController) syncStaleSNATRules(egressIPCache map[string] cacheEntry, exists := egressIPCache[egressIPName] egressPodIPs := sets.NewString() if exists { - for _, podIPs := range cacheEntry.egressPods { + // since SNATs can be present either if status.Node was local to + // the zone or pods were local to the zone, we need to check both + for _, podIPs := range cacheEntry.egressLocalPods { + egressPodIPs.Insert(podIPs.UnsortedList()...) + } + for _, podIPs := range cacheEntry.egressRemotePods { egressPodIPs.Insert(podIPs.UnsortedList()...) } } @@ -1539,7 +1024,7 @@ func (oc *DefaultNetworkController) syncStaleSNATRules(egressIPCache map[string] klog.Infof("syncStaleSNATRules will delete %s due to logical ip: %v", egressIPName, item) return true } - if node, ok := cacheEntry.egressIPs[item.ExternalIP]; !ok || + if node, ok := cacheEntry.egressIPs[item.ExternalIP]; !ok || !cacheEntry.egressLocalNodes.Has(node) || item.LogicalPort == nil || *item.LogicalPort != types.K8sPrefix+node { klog.Infof("syncStaleSNATRules will delete %s due to external ip or stale logical port: %v", egressIPName, item) return true @@ -1614,18 +1099,32 @@ func (oc *DefaultNetworkController) generateCacheForEgressIP() (map[string]egres } for _, egressIP := range egressIPs { egressIPCache[egressIP.Name] = egressIPCacheEntry{ - egressPods: make(map[string]sets.Set[string]), - gatewayRouterIPs: sets.New[string](), + egressLocalPods: make(map[string]sets.Set[string]), + egressRemotePods: make(map[string]sets.Set[string]), + gatewayRouterIPs: sets.New[string](), // can be transit switchIPs for interconnect multizone setup egressIPs: map[string]string{}, + egressLocalNodes: sets.New[string](), } for _, status := range egressIP.Status.Items { + var nextHopIP string isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := oc.eIPC.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if err != nil { - klog.Errorf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) - continue + _, isLocalZoneEgressNode := oc.localZoneNodes.Load(status.Node) + if isLocalZoneEgressNode { + gatewayRouterIP, err := oc.eIPC.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + klog.Errorf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + continue + } + nextHopIP = gatewayRouterIP.String() + egressIPCache[egressIP.Name].egressLocalNodes.Insert(status.Node) + } else { + nextHopIP, err = oc.eIPC.getTransitIP(status.Node, isEgressIPv6) + if err != nil { + klog.Errorf("Unable to fetch transit switch IP for node %s: %v", status.Node, err) + continue + } } - egressIPCache[egressIP.Name].gatewayRouterIPs.Insert(gatewayRouterIP.String()) + egressIPCache[egressIP.Name].gatewayRouterIPs.Insert(nextHopIP) egressIPCache[egressIP.Name].egressIPs[status.EgressIP] = status.Node } namespaces, err := oc.watchFactory.GetNamespacesBySelector(egressIP.Spec.NamespaceSelector) @@ -1643,6 +1142,9 @@ func (oc *DefaultNetworkController) generateCacheForEgressIP() (map[string]egres if util.PodCompleted(pod) { continue } + if len(egressIPCache[egressIP.Name].egressLocalNodes) == 0 && !oc.isPodScheduledinLocalZone(pod) { + continue // don't process anything on master's that have nothing to do with the pod + } // FIXME(trozet): potential race where pod is not yet added in the cache by the pod handler logicalPort, err := oc.logicalPortCache.get(pod, types.DefaultNetworkName) if err != nil { @@ -1650,496 +1152,117 @@ func (oc *DefaultNetworkController) generateCacheForEgressIP() (map[string]egres continue } podKey := getPodKey(pod) - _, ok := egressIPCache[egressIP.Name].egressPods[podKey] - if !ok { - egressIPCache[egressIP.Name].egressPods[podKey] = sets.New[string]() - } - for _, ipNet := range logicalPort.ips { - egressIPCache[egressIP.Name].egressPods[podKey].Insert(ipNet.IP.String()) - } - } - } - } - - return egressIPCache, nil -} - -// isAnyClusterNodeIP verifies that the IP is not any node IP. -func (oc *DefaultNetworkController) isAnyClusterNodeIP(ip net.IP) *egressNode { - for _, eNode := range oc.eIPC.allocator.cache { - if ip.Equal(eNode.egressIPConfig.V6.IP) || ip.Equal(eNode.egressIPConfig.V4.IP) { - return eNode - } - } - return nil -} - -type EgressIPPatchStatus struct { - Op string `json:"op"` - Path string `json:"path"` - Value egressipv1.EgressIPStatus `json:"value"` -} - -// patchReplaceEgressIPStatus performs a replace patch operation of the egress -// IP status by replacing the status with the provided value. This allows us to -// update only the status field, without overwriting any other. This is -// important because processing egress IPs can take a while (when running on a -// public cloud and in the worst case), hence we don't want to perform a full -// object update which risks resetting the EgressIP object's fields to the state -// they had when we started processing the change. -func (oc *DefaultNetworkController) patchReplaceEgressIPStatus(name string, statusItems []egressipv1.EgressIPStatusItem) error { - klog.Infof("Patching status on EgressIP %s: %v", name, statusItems) - return retry.RetryOnConflict(retry.DefaultRetry, func() error { - t := []EgressIPPatchStatus{ - { - Op: "replace", - Path: "/status", - Value: egressipv1.EgressIPStatus{ - Items: statusItems, - }, - }, - } - op, err := json.Marshal(&t) - if err != nil { - return fmt.Errorf("error serializing status patch operation: %+v, err: %v", statusItems, err) - } - return oc.kube.PatchEgressIP(name, op) - }) -} - -// assignEgressIPs is the main assignment algorithm for egress IPs to nodes. -// Specifically we have a couple of hard constraints: a) the subnet of the node -// must be able to host the egress IP b) the egress IP cannot be a node IP c) -// the IP cannot already be assigned and reference by another EgressIP object d) -// no two egress IPs for the same EgressIP object can be assigned to the same -// node e) (for public clouds) the amount of egress IPs assigned to one node -// must respect its assignment capacity. Moreover there is a soft constraint: -// the assignments need to be balanced across all cluster nodes, so that no node -// becomes a bottleneck. The balancing is achieved by sorting the nodes in -// ascending order following their existing amount of allocations, and trying to -// assign the egress IP to the node with the lowest amount of allocations every -// time, this does not guarantee complete balance, but mostly complete. -func (oc *DefaultNetworkController) assignEgressIPs(name string, egressIPs []string) []egressipv1.EgressIPStatusItem { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - assignments := []egressipv1.EgressIPStatusItem{} - assignableNodes, existingAllocations := oc.getSortedEgressData() - if len(assignableNodes) == 0 { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "NoMatchingNodeFound", "no assignable nodes for EgressIP: %s, please tag at least one node with label: %s", name, util.GetNodeEgressLabel()) - klog.Errorf("No assignable nodes found for EgressIP: %s and requested IPs: %v", name, egressIPs) - return assignments - } - klog.V(5).Infof("Current assignments are: %+v", existingAllocations) - for _, egressIP := range egressIPs { - klog.V(5).Infof("Will attempt assignment for egress IP: %s", egressIP) - eIPC := net.ParseIP(egressIP) - if status, exists := existingAllocations[eIPC.String()]; exists { - // On public clouds we will re-process assignments for the same IP - // multiple times due to the nature of syncing each individual - // CloudPrivateIPConfig one at a time. This means that we are - // expected to end up in this situation multiple times per sync. Ex: - // Say we an EgressIP is created with IP1, IP2, IP3. We begin by - // assigning them all the first round. Next we get the - // CloudPrivateIPConfig confirming the addition of IP1, leading us - // to re-assign IP2, IP3, but since we've already assigned them - // we'll end up here. This is not an error. What would be an error - // is if the user created EIP1 with IP1 and a second EIP2 with IP1 - if name == status.Name { - // IP is already assigned for this EgressIP object - assignments = append(assignments, egressipv1.EgressIPStatusItem{ - Node: status.Node, - EgressIP: eIPC.String(), - }) - continue - } else { - klog.Errorf("IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node) - return assignments - } - } - if node := oc.isAnyClusterNodeIP(eIPC); node != nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf( - &eIPRef, - kapi.EventTypeWarning, - "UnsupportedRequest", - "Egress IP: %v for object EgressIP: %s is the IP address of node: %s, this is unsupported", eIPC, name, node.name, - ) - klog.Errorf("Egress IP: %v is the IP address of node: %s", eIPC, node.name) - return assignments - } - for _, eNode := range assignableNodes { - klog.V(5).Infof("Attempting assignment on egress node: %+v", eNode) - if eNode.getAllocationCountForEgressIP(name) > 0 { - klog.V(5).Infof("Node: %s is already in use by another egress IP for this EgressIP: %s, trying another node", eNode.name, name) - continue - } - if eNode.egressIPConfig.Capacity.IP < util.UnlimitedNodeCapacity { - if eNode.egressIPConfig.Capacity.IP-len(eNode.allocations) <= 0 { - klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IP capacity, trying another node", eNode.name) - continue - } - } - if eNode.egressIPConfig.Capacity.IPv4 < util.UnlimitedNodeCapacity && utilnet.IsIPv4(eIPC) { - if eNode.egressIPConfig.Capacity.IPv4-getIPFamilyAllocationCount(eNode.allocations, false) <= 0 { - klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv4 capacity, trying another node", eNode.name) - continue - } - } - if eNode.egressIPConfig.Capacity.IPv6 < util.UnlimitedNodeCapacity && utilnet.IsIPv6(eIPC) { - if eNode.egressIPConfig.Capacity.IPv6-getIPFamilyAllocationCount(eNode.allocations, true) <= 0 { - klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv6 capacity, trying another node", eNode.name) - continue + if oc.isPodScheduledinLocalZone(pod) { + _, ok := egressIPCache[egressIP.Name].egressLocalPods[podKey] + if !ok { + egressIPCache[egressIP.Name].egressLocalPods[podKey] = sets.New[string]() + } + for _, ipNet := range logicalPort.ips { + egressIPCache[egressIP.Name].egressLocalPods[podKey].Insert(ipNet.IP.String()) + } + } else if len(egressIPCache[egressIP.Name].egressLocalNodes) > 0 { + // it means this controller has at least one egressNode that is in localZone but matched pod is remote + _, ok := egressIPCache[egressIP.Name].egressRemotePods[podKey] + if !ok { + egressIPCache[egressIP.Name].egressRemotePods[podKey] = sets.New[string]() + } + for _, ipNet := range logicalPort.ips { + egressIPCache[egressIP.Name].egressRemotePods[podKey].Insert(ipNet.IP.String()) + } } } - if (eNode.egressIPConfig.V6.Net != nil && eNode.egressIPConfig.V6.Net.Contains(eIPC)) || - (eNode.egressIPConfig.V4.Net != nil && eNode.egressIPConfig.V4.Net.Contains(eIPC)) { - assignments = append(assignments, egressipv1.EgressIPStatusItem{ - Node: eNode.name, - EgressIP: eIPC.String(), - }) - klog.Infof("Successful assignment of egress IP: %s on node: %+v", egressIP, eNode) - eNode.allocations[eIPC.String()] = name - break - } - } - } - if len(assignments) == 0 { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "NoMatchingNodeFound", "No matching nodes found, which can host any of the egress IPs: %v for object EgressIP: %s", egressIPs, name) - klog.Errorf("No matching host found for EgressIP: %s", name) - return assignments - } - if len(assignments) < len(egressIPs) { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "UnassignedRequest", "Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", name) - } - return assignments -} - -func getIPFamilyAllocationCount(allocations map[string]string, isIPv6 bool) (count int) { - for allocation := range allocations { - if utilnet.IsIPv4String(allocation) && !isIPv6 { - count++ - } - if utilnet.IsIPv6String(allocation) && isIPv6 { - count++ - } - } - return -} - -type egressIPNodeStatus struct { - Node string - Name string -} - -// getSortedEgressData returns a sorted slice of all egressNodes based on the -// amount of allocations found in the cache -func (oc *DefaultNetworkController) getSortedEgressData() ([]*egressNode, map[string]egressIPNodeStatus) { - assignableNodes := []*egressNode{} - allAllocations := make(map[string]egressIPNodeStatus) - for _, eNode := range oc.eIPC.allocator.cache { - if eNode.isEgressAssignable && eNode.isReady && eNode.isReachable { - assignableNodes = append(assignableNodes, eNode) - } - for ip, eipName := range eNode.allocations { - allAllocations[ip] = egressIPNodeStatus{Node: eNode.name, Name: eipName} - } - } - sort.Slice(assignableNodes, func(i, j int) bool { - return len(assignableNodes[i].allocations) < len(assignableNodes[j].allocations) - }) - return assignableNodes, allAllocations -} - -func (oc *DefaultNetworkController) setNodeEgressAssignable(nodeName string, isAssignable bool) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if eNode, exists := oc.eIPC.allocator.cache[nodeName]; exists { - eNode.isEgressAssignable = isAssignable - // if the node is not assignable/ready/reachable anymore we need to - // empty all of it's allocations from our cache since we'll clear all - // assignments from this node later on, because of this. - if !isAssignable { - eNode.allocations = make(map[string]string) - } - } -} - -func (oc *DefaultNetworkController) setNodeEgressReady(nodeName string, isReady bool) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if eNode, exists := oc.eIPC.allocator.cache[nodeName]; exists { - eNode.isReady = isReady - // see setNodeEgressAssignable - if !isReady { - eNode.allocations = make(map[string]string) - } - } -} - -func (oc *DefaultNetworkController) setNodeEgressReachable(nodeName string, isReachable bool) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if eNode, exists := oc.eIPC.allocator.cache[nodeName]; exists { - eNode.isReachable = isReachable - // see setNodeEgressAssignable - if !isReachable { - eNode.allocations = make(map[string]string) - } - } -} - -func (oc *DefaultNetworkController) addEgressNode(nodeName string) error { - var errors []error - klog.V(5).Infof("Egress node: %s about to be initialized", nodeName) - // This option will program OVN to start sending GARPs for all external IPS - // that the logical switch port has been configured to use. This is - // necessary for egress IP because if an egress IP is moved between two - // nodes, the nodes need to actively update the ARP cache of all neighbors - // as to notify them the change. If this is not the case: packets will - // continue to be routed to the old node which hosted the egress IP before - // it was moved, and the connections will fail. - portName := types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName - lsp := nbdb.LogicalSwitchPort{ - Name: portName, - // Setting nat-addresses to router will send out GARPs for all externalIPs and LB VIPs - // hosted on the GR. Setting exclude-lb-vips-from-garp to true will make sure GARPs for - // LB VIPs are not sent, thereby preventing GARP overload. - Options: map[string]string{"nat-addresses": "router", "exclude-lb-vips-from-garp": "true"}, - } - err := libovsdbops.UpdateLogicalSwitchPortSetOptions(oc.nbClient, &lsp) - if err != nil { - errors = append(errors, fmt.Errorf("unable to configure GARP on external logical switch port for egress node: %s, "+ - "this will result in packet drops during egress IP re-assignment, err: %v", nodeName, err)) - } - - // If a node has been labelled for egress IP we need to check if there are any - // egress IPs which are missing an assignment. If there are, we need to send a - // synthetic update since reconcileEgressIP will then try to assign those IPs to - // this node (if possible) - egressIPs, err := oc.kube.GetEgressIPs() - if err != nil { - return fmt.Errorf("unable to list EgressIPs, err: %v", err) - } - for _, egressIP := range egressIPs.Items { - if len(egressIP.Spec.EgressIPs) != len(egressIP.Status.Items) { - // Send a "synthetic update" on all egress IPs which are not fully - // assigned, the reconciliation loop for WatchEgressIP will try to - // assign stuff to this new node. The workqueue's delta FIFO - // implementation will not trigger a watch event for updates on - // objects which have no semantic difference, hence: call the - // reconciliation function directly. - if err := oc.reconcileEgressIP(nil, &egressIP); err != nil { - errors = append(errors, fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err)) - } } } - if len(errors) > 0 { - return utilerrors.NewAggregate(errors) - } - return nil + return egressIPCache, nil } -func (oc *DefaultNetworkController) deleteEgressNode(nodeName string) error { - var errorAggregate []error - klog.V(5).Infof("Egress node: %s about to be removed", nodeName) - // This will remove the option described in addEgressNode from the logical - // switch port, since this node will not be used for egress IP assignments - // from now on. - portName := types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName - lsp := nbdb.LogicalSwitchPort{ - Name: portName, - Options: map[string]string{"nat-addresses": "", "exclude-lb-vips-from-garp": ""}, - } - err := libovsdbops.UpdateLogicalSwitchPortSetOptions(oc.nbClient, &lsp) - if errors.Is(err, libovsdbclient.ErrNotFound) { - // if the LSP setup is already gone, then don't count it as error. - klog.Warningf("Unable to remove GARP configuration on external logical switch port for egress node: %s, err: %v", nodeName, err) - } else if err != nil { - errorAggregate = append(errorAggregate, fmt.Errorf("unable to remove GARP configuration on external logical switch port for egress node: %s, err: %v", nodeName, err)) - } - - // Since the node has been labelled as "not usable" for egress IP - // assignments we need to find all egress IPs which have an assignment to - // it, and move them elsewhere. - egressIPs, err := oc.kube.GetEgressIPs() - if err != nil { - return fmt.Errorf("unable to list EgressIPs, err: %v", err) - } - for _, egressIP := range egressIPs.Items { - for _, status := range egressIP.Status.Items { - if status.Node == nodeName { - // Send a "synthetic update" on all egress IPs which have an - // assignment to this node. The reconciliation loop for - // WatchEgressIP will see that the current assignment status to - // this node is invalid and try to re-assign elsewhere. The - // workqueue's delta FIFO implementation will not trigger a - // watch event for updates on objects which have no semantic - // difference, hence: call the reconciliation function directly. - if err := oc.reconcileEgressIP(nil, &egressIP); err != nil { - errorAggregate = append(errorAggregate, fmt.Errorf("Re-assignment for EgressIP: %s failed, unable to update object, err: %v", egressIP.Name, err)) - } - break - } - } - } - if len(errorAggregate) > 0 { - return utilerrors.NewAggregate(errorAggregate) - } - return nil +type EgressIPPatchStatus struct { + Op string `json:"op"` + Path string `json:"path"` + Value egressipv1.EgressIPStatus `json:"value"` } -func (oc *DefaultNetworkController) initEgressIPAllocator(node *kapi.Node) (err error) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if _, exists := oc.eIPC.allocator.cache[node.Name]; !exists { - var parsedEgressIPConfig *util.ParsedNodeEgressIPConfiguration - if util.PlatformTypeIsEgressIPCloudProvider() { - parsedEgressIPConfig, err = util.ParseCloudEgressIPConfig(node) - if err != nil { - return fmt.Errorf("unable to use cloud node for egress assignment, err: %v", err) - } - } else { - parsedEgressIPConfig, err = util.ParseNodePrimaryIfAddr(node) - if err != nil { - return fmt.Errorf("unable to use node for egress assignment, err: %v", err) - } +// patchReplaceEgressIPStatus performs a replace patch operation of the egress +// IP status by replacing the status with the provided value. This allows us to +// update only the status field, without overwriting any other. This is +// important because processing egress IPs can take a while (when running on a +// public cloud and in the worst case), hence we don't want to perform a full +// object update which risks resetting the EgressIP object's fields to the state +// they had when we started processing the change. +// used for UNIT TESTING only +func (oc *DefaultNetworkController) patchReplaceEgressIPStatus(name string, statusItems []egressipv1.EgressIPStatusItem) error { + klog.Infof("Patching status on EgressIP %s: %v", name, statusItems) + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + t := []EgressIPPatchStatus{ + { + Op: "replace", + Path: "/status", + Value: egressipv1.EgressIPStatus{ + Items: statusItems, + }, + }, } - nodeSubnets, err := util.ParseNodeHostSubnetAnnotation(node, types.DefaultNetworkName) + op, err := json.Marshal(&t) if err != nil { - return fmt.Errorf("failed to parse node %s subnets annotation %v", node.Name, err) - } - mgmtIPs := make([]net.IP, len(nodeSubnets)) - for i, subnet := range nodeSubnets { - mgmtIPs[i] = util.GetNodeManagementIfAddr(subnet).IP - } - oc.eIPC.allocator.cache[node.Name] = &egressNode{ - name: node.Name, - egressIPConfig: parsedEgressIPConfig, - mgmtIPs: mgmtIPs, - allocations: make(map[string]string), - healthClient: hccAllocator.allocate(node.Name), + return fmt.Errorf("error serializing status patch operation: %+v, err: %v", statusItems, err) } - } - return nil + return oc.kube.PatchEgressIP(name, op) + }) } -// reconcileNodeForEgressIP with respect and old and new status of a node -func (oc *DefaultNetworkController) reconcileNodeForEgressIP(oldNode, newNode *v1.Node) error { - // Check if the node's addresses changed. If so, update LR policies. - if oldNode == nil || newNode == nil || util.NodeHostAddressesAnnotationChanged(oldNode, newNode) { - klog.Infof("Egress IP detected IP address change. Updating no re-route policies") - err := oc.ensureDefaultNoRerouteNodePolicies() - if err != nil { - return err - } - } - - nodeEgressLabel := util.GetNodeEgressLabel() - var oldLabels map[string]string - var newLabels map[string]string - var isOldReady, isNewReady, isNewReachable bool - var nodeName string - if oldNode != nil { - oldLabels = oldNode.GetLabels() - isOldReady = oc.isEgressNodeReady(oldNode) - nodeName = oldNode.Name - } - if newNode != nil { - // Initialize the allocator on every update, - // ovnkube-node/cloud-network-config-controller will make sure to - // annotate the node with the egressIPConfig, but that might have - // happened after we processed the ADD for that object, hence keep - // retrying for all UPDATEs. - if err := oc.initEgressIPAllocator(newNode); err != nil { - klog.Warningf("Egress node initialization error: %v", err) - } - - newLabels = newNode.GetLabels() - isNewReady = oc.isEgressNodeReady(newNode) - isNewReachable = oc.isEgressNodeReachable(newNode) - nodeName = newNode.Name - } else if oldNode != nil { - err := oc.deleteEgressIPAllocator(oldNode) - if err != nil { - return nil - } - } - - _, oldHadEgressLabel := oldLabels[nodeEgressLabel] - _, newHasEgressLabel := newLabels[nodeEgressLabel] - oc.setNodeEgressAssignable(nodeName, newHasEgressLabel) - oc.setNodeEgressReady(nodeName, isNewReady) - - // If the node is not labeled for egress assignment, just return - // directly, we don't really need to set the ready / reachable - // status on this node if the user doesn't care about using it. - if !oldHadEgressLabel && !newHasEgressLabel { +func (oc *DefaultNetworkController) addEgressNode(node *v1.Node) error { + if node == nil { return nil } - - if oldHadEgressLabel && !newHasEgressLabel { - klog.Infof("Node: %s has been un-labeled, deleting it from egress assignment", nodeName) - return oc.deleteEgressNode(nodeName) - } - - if !oldHadEgressLabel && newHasEgressLabel { - klog.Infof("Node: %s has been labeled, adding it for egress assignment", nodeName) - if isNewReady && isNewReachable { - oc.setNodeEgressReachable(nodeName, isNewReachable) - if err := oc.addEgressNode(nodeName); err != nil { - return err - } - } else { - klog.Warningf("Node: %s has been labeled, but node is not ready"+ - " and reachable, cannot use it for egress assignment", nodeName) + if oc.isLocalZoneNode(node) { + klog.V(5).Infof("Egress node: %s about to be initialized", node.Name) + // This option will program OVN to start sending GARPs for all external IPS + // that the logical switch port has been configured to use. This is + // necessary for egress IP because if an egress IP is moved between two + // nodes, the nodes need to actively update the ARP cache of all neighbors + // as to notify them the change. If this is not the case: packets will + // continue to be routed to the old node which hosted the egress IP before + // it was moved, and the connections will fail. + portName := types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + lsp := nbdb.LogicalSwitchPort{ + Name: portName, + // Setting nat-addresses to router will send out GARPs for all externalIPs and LB VIPs + // hosted on the GR. Setting exclude-lb-vips-from-garp to true will make sure GARPs for + // LB VIPs are not sent, thereby preventing GARP overload. + Options: map[string]string{"nat-addresses": "router", "exclude-lb-vips-from-garp": "true"}, + } + err := libovsdbops.UpdateLogicalSwitchPortSetOptions(oc.nbClient, &lsp) + if err != nil { + return fmt.Errorf("unable to configure GARP on external logical switch port for egress node: %s, "+ + "this will result in packet drops during egress IP re-assignment, err: %v", node.Name, err) } - return nil } + return nil +} - if isOldReady == isNewReady { +func (oc *DefaultNetworkController) deleteEgressNode(node *v1.Node) error { + if node == nil { return nil } - - if !isNewReady { - klog.Warningf("Node: %s is not ready, deleting it from egress assignment", nodeName) - if err := oc.deleteEgressNode(nodeName); err != nil { - return err + if oc.isLocalZoneNode(node) { + klog.V(5).Infof("Egress node: %s about to be removed", node.Name) + // This will remove the option described in addEgressNode from the logical + // switch port, since this node will not be used for egress IP assignments + // from now on. + portName := types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + lsp := nbdb.LogicalSwitchPort{ + Name: portName, + Options: map[string]string{"nat-addresses": "", "exclude-lb-vips-from-garp": ""}, } - } else if isNewReady && isNewReachable { - klog.Infof("Node: %s is ready and reachable, adding it for egress assignment", nodeName) - oc.setNodeEgressReachable(nodeName, isNewReachable) - if err := oc.addEgressNode(nodeName); err != nil { - return err + err := libovsdbops.UpdateLogicalSwitchPortSetOptions(oc.nbClient, &lsp) + if errors.Is(err, libovsdbclient.ErrNotFound) { + // if the LSP setup is already gone, then don't count it as error. + klog.Warningf("Unable to remove GARP configuration on external logical switch port for egress node: %s, err: %v", node.Name, err) + } else if err != nil { + return fmt.Errorf("unable to remove GARP configuration on external logical switch port for egress node: %s, err: %v", node.Name, err) } } - - return nil -} - -// deleteEgressIPAllocator removes the node from the allocator cache. -func (oc *DefaultNetworkController) deleteEgressIPAllocator(node *v1.Node) error { - oc.eIPC.allocator.Lock() - if eNode, exists := oc.eIPC.allocator.cache[node.Name]; exists { - eNode.healthClient.Disconnect() - } - delete(oc.eIPC.allocator.cache, node.Name) - oc.eIPC.allocator.Unlock() return nil } @@ -2163,8 +1286,6 @@ func (oc *DefaultNetworkController) initClusterEgressPolicies(nodes []interface{ return err } } - - go oc.checkEgressNodesReachability() return nil } @@ -2203,27 +1324,6 @@ func InitClusterEgressPolicies(nbClient libovsdbclient.Client, addressSetFactory return nil } -// egressNode is a cache helper used for egress IP assignment, representing an egress node -type egressNode struct { - egressIPConfig *util.ParsedNodeEgressIPConfiguration - mgmtIPs []net.IP - allocations map[string]string - healthClient healthcheck.EgressIPHealthClient - isReady bool - isReachable bool - isEgressAssignable bool - name string -} - -func (e *egressNode) getAllocationCountForEgressIP(name string) (count int) { - for _, egressIPName := range e.allocations { - if egressIPName == name { - count++ - } - } - return -} - // podAssignmentState keeps track of which egressIP object is serving // the related pod. // NOTE: At a given time only one object will be configured. This is @@ -2250,19 +1350,7 @@ func (pas *podAssignmentState) Clone() *podAssignmentState { return clone } -type allocator struct { - *sync.Mutex - // A cache used for egress IP assignments containing data for all cluster nodes - // used for egress IP assignments - cache map[string]*egressNode -} - -type egressIPController struct { - // egressIPAssignmentMutex is used to ensure a safe updates between - // concurrent go-routines which could be modifying the egress IP status - // assignment simultaneously. Currently WatchEgressNodes and WatchEgressIP - // run two separate go-routines which do this. - egressIPAssignmentMutex *sync.Mutex +type egressIPZoneController struct { // podAssignmentMutex is used to ensure safe access to podAssignment. // Currently WatchEgressIP, WatchEgressNamespace and WatchEgressPod could // all access that map simultaneously, hence why this guard is needed. @@ -2273,33 +1361,13 @@ type egressIPController struct { // podAssignment is a cache used for keeping track of which egressIP status // has been setup for each pod. The key is defined by getPodKey podAssignment map[string]*podAssignmentState - // pendingCloudPrivateIPConfigsMutex is used to ensure synchronized access - // to pendingCloudPrivateIPConfigsOps which is accessed by the egress IP and - // cloudPrivateIPConfig go-routines - pendingCloudPrivateIPConfigsMutex *sync.Mutex - // pendingCloudPrivateIPConfigsOps is a cache of pending - // CloudPrivateIPConfig changes that we are waiting on an answer for. Items - // in this map are only ever removed once the op is fully finished and we've - // been notified of this. That means: - // - On add operations we only delete once we've seen that the - // CloudPrivateIPConfig is fully added. - // - On delete: when it's fully deleted. - // - On update: once we finish processing the add - which comes after the - // delete. - pendingCloudPrivateIPConfigsOps map[string]map[string]*cloudPrivateIPConfigOp - // allocator is a cache of egress IP centric data needed to when both route - // health-checking and tracking allocations made - allocator allocator // libovsdb northbound client interface nbClient libovsdbclient.Client // watchFactory watching k8s objects watchFactory *factory.WatchFactory - // EgressIP Node reachability total timeout configuration - egressIPTotalTimeout int - // reachability check interval - reachabilityCheckInterval time.Duration - // EgressIP Node reachability gRPC port (0 means it should use dial instead) - egressIPNodeHealthCheckPort int + // A cache that maintains all nodes in the cluster, + // value will be true if local to this zone and false otherwise + nodeZoneState *syncmap.SyncMap[bool] } // addStandByEgressIPAssignment does the same setup that is done by addPodEgressIPAssignments but for @@ -2347,7 +1415,8 @@ func (oc *DefaultNetworkController) addStandByEgressIPAssignment(podKey string, // addPodEgressIPAssignment will program OVN with logical router policies // (routing pod traffic to the egress node) and NAT objects on the egress node // (SNAT-ing to the egress IP). -func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { +// This function should be called with lock on nodeZoneState cache key status.Node and pod.Spec.NodeName +func (e *egressIPZoneController) addPodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { @@ -2358,29 +1427,42 @@ func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, statu metrics.RecordEgressIPAssign(duration) }() } - - ops, err := createNATRuleOps(e.nbClient, nil, podIPs, status, egressIPName) - if err != nil { - return fmt.Errorf("unable to create NAT rule ops for status: %v, err: %v", status, err) - } - - ops, err = e.createReroutePolicyOps(ops, podIPs, status, egressIPName) - if err != nil { - return fmt.Errorf("unable to create logical router policy ops, err: %v", err) + isLocalZoneEgressNode, loadedEgressNode := e.nodeZoneState.Load(status.Node) + isLocalZonePod, loadedPodNode := e.nodeZoneState.Load(pod.Spec.NodeName) + var ops []ovsdb.Operation + if loadedEgressNode && isLocalZoneEgressNode { + ops, err = createNATRuleOps(e.nbClient, nil, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to create NAT rule ops for status: %v, err: %v", status, err) + } + if config.OVNKubernetesFeature.EnableInterconnect && (loadedPodNode && !isLocalZonePod) { + // configure reroute for non-local-zone pods on egress nodes + ops, err = e.createStaticRouteOps(ops, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to create logical router static route ops %v, err: %v", status, err) + } + } } - ops, err = e.deleteExternalGWPodSNATOps(ops, pod, podIPs, status) - if err != nil { - return err + // don't add a reroute policy if the egress node towards which we are adding this doesn't exist + if loadedEgressNode && loadedPodNode && isLocalZonePod { + ops, err = e.createReroutePolicyOps(ops, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to create logical router policy ops, err: %v", err) + } + ops, err = e.deleteExternalGWPodSNATOps(ops, pod, podIPs, status) + if err != nil { + return err + } } - _, err = libovsdbops.TransactAndCheck(e.nbClient, ops) return err } // deletePodEgressIPAssignment deletes the OVN programmed egress IP // configuration mentioned for addPodEgressIPAssignment. -func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { +// This function should be called with lock on nodeZoneState cache key status.Node and pod.Spec.NodeName +func (e *egressIPZoneController) deletePodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { @@ -2392,22 +1474,34 @@ func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, st }() } - ops, err := e.addExternalGWPodSNATOps(nil, pod.Namespace, pod.Name, status) - if err != nil { - return err - } - - ops, err = e.deleteReroutePolicyOps(ops, podIPs, status, egressIPName) - if errors.Is(err, libovsdbclient.ErrNotFound) { - // if the gateway router join IP setup is already gone, then don't count it as error. - klog.Warningf("Unable to delete logical router policy, err: %v", err) - } else if err != nil { - return fmt.Errorf("unable to delete logical router policy, err: %v", err) + isLocalZonePod, loadedPodNode := e.nodeZoneState.Load(pod.Spec.NodeName) + var ops []ovsdb.Operation + if !loadedPodNode || isLocalZonePod { // node is deleted (we can't determine zone so we always try and nuke OR pod is local to zone) + ops, err = e.addExternalGWPodSNATOps(nil, pod.Namespace, pod.Name, status) + if err != nil { + return err + } + ops, err = e.deleteReroutePolicyOps(ops, podIPs, status, egressIPName) + if errors.Is(err, libovsdbclient.ErrNotFound) { + // if the gateway router join IP setup is already gone, then don't count it as error. + klog.Warningf("Unable to delete logical router policy, err: %v", err) + } else if err != nil { + return fmt.Errorf("unable to delete logical router policy, err: %v", err) + } } - - ops, err = deleteNATRuleOps(e.nbClient, ops, podIPs, status, egressIPName) - if err != nil { - return fmt.Errorf("unable to delete NAT rule for status: %v, err: %v", status, err) + isLocalZoneEgressNode, loadedEgressNode := e.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + if config.OVNKubernetesFeature.EnableInterconnect && (!loadedPodNode || !isLocalZonePod) { // node is deleted (we can't determine zone so we always try and nuke OR pod is remote to zone) + // delete reroute for non-local-zone pods on egress nodes + ops, err = e.deleteStaticRouteOps(ops, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to delete logical router static route ops %v, err: %v", status, err) + } + } + ops, err = deleteNATRuleOps(e.nbClient, ops, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to delete NAT rule for status: %v, err: %v", status, err) + } } _, err = libovsdbops.TransactAndCheck(e.nbClient, ops) return err @@ -2424,7 +1518,7 @@ func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, st // check the informer cache since on pod deletion the event handlers are // triggered after the update to the informer cache. We should not re-add the // external GW setup in those cases. -func (e *egressIPController) addExternalGWPodSNAT(podNamespace, podName string, status egressipv1.EgressIPStatusItem) error { +func (e *egressIPZoneController) addExternalGWPodSNAT(podNamespace, podName string, status egressipv1.EgressIPStatusItem) error { ops, err := e.addExternalGWPodSNATOps(nil, podNamespace, podName, status) if err != nil { return fmt.Errorf("error creating ops for adding external gw pod snat: %+v", err) @@ -2447,9 +1541,15 @@ func (e *egressIPController) addExternalGWPodSNAT(podNamespace, podName string, // check the informer cache since on pod deletion the event handlers are // triggered after the update to the informer cache. We should not re-add the // external GW setup in those cases. -func (e *egressIPController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podNamespace, podName string, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { +// This function should be called with lock on nodeZoneState cache key pod.Spec.Name +func (e *egressIPZoneController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podNamespace, podName string, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { if config.Gateway.DisableSNATMultipleGWs { - if pod, err := e.watchFactory.GetPod(podNamespace, podName); err == nil && pod.Spec.NodeName == status.Node && util.PodNeedsSNAT(pod) { + pod, err := e.watchFactory.GetPod(podNamespace, podName) + if err != nil { + return nil, nil // nothing to do. + } + isLocalZonePod, loadedPodNode := e.nodeZoneState.Load(pod.Spec.NodeName) + if pod.Spec.NodeName == status.Node && loadedPodNode && isLocalZonePod && util.PodNeedsSNAT(pod) { // if the pod still exists, add snats to->nodeIP (on the node where the pod exists) for these podIPs after deleting the snat to->egressIP // NOTE: This needs to be done only if the pod was on the same node as egressNode extIPs, err := getExternalIPsGR(e.watchFactory, pod.Spec.NodeName) @@ -2471,7 +1571,7 @@ func (e *egressIPController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podN } // deleteExternalGWPodSNATOps creates ops for the required external GW teardown for the given pod -func (e *egressIPController) deleteExternalGWPodSNATOps(ops []ovsdb.Operation, pod *kapi.Pod, podIPs []*net.IPNet, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { +func (e *egressIPZoneController) deleteExternalGWPodSNATOps(ops []ovsdb.Operation, pod *kapi.Pod, podIPs []*net.IPNet, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { if config.Gateway.DisableSNATMultipleGWs && status.Node == pod.Spec.NodeName { // remove snats to->nodeIP (from the node where pod exists if that node is also serving // as an egress node for this pod) for these podIPs before adding the snat to->egressIP @@ -2490,13 +1590,13 @@ func (e *egressIPController) deleteExternalGWPodSNATOps(ops []ovsdb.Operation, p return ops, nil } -func (e *egressIPController) getGatewayRouterJoinIP(node string, wantsIPv6 bool) (net.IP, error) { +func (e *egressIPZoneController) getGatewayRouterJoinIP(node string, wantsIPv6 bool) (net.IP, error) { gatewayIPs, err := util.GetLRPAddrs(e.nbClient, types.GWRouterToJoinSwitchPrefix+types.GWRouterPrefix+node) if err != nil { return nil, fmt.Errorf("attempt at finding node gateway router network information failed, err: %w", err) } if gatewayIP, err := util.MatchFirstIPNetFamily(wantsIPv6, gatewayIPs); err != nil { - return nil, fmt.Errorf("could not find node %s gateway router: %v", node, err) + return nil, fmt.Errorf("could not find gateway IP for node %s with family %v: %v", node, wantsIPv6, err) } else { return gatewayIP.IP, nil } @@ -2510,6 +1610,23 @@ func ipFamilyName(isIPv6 bool) string { return "ip4" } +func (e *egressIPZoneController) getTransitIP(nodeName string, wantsIPv6 bool) (string, error) { + // fetch node annotation of the egress node + node, err := e.watchFactory.GetNode(nodeName) + if err != nil { + return "", fmt.Errorf("failed to get node %s: %w", nodeName, err) + } + nodeTransitIPs, err := util.ParseNodeTransitSwitchPortAddrs(node) + if err != nil { + return "", fmt.Errorf("unable to fetch transit switch IP for node %s: %w", nodeName, err) + } + nodeTransitIP, err := util.MatchFirstIPNetFamily(wantsIPv6, nodeTransitIPs) + if err != nil { + return "", fmt.Errorf("could not find transit switch IP of node %v for this family %v: %v", node, wantsIPv6, err) + } + return nodeTransitIP.IP.String(), nil +} + // createReroutePolicyOps creates an operation that does idempotent updates of the // LogicalRouterPolicy corresponding to the egressIP status item, according to the // following update procedure: @@ -2518,19 +1635,37 @@ func ipFamilyName(isIPv6 bool) string { // to equal [gatewayRouterIP] // - if the LogicalRouterPolicy does exist: it adds the gatewayRouterIP to the // array of nexthops -func (e *egressIPController) createReroutePolicyOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { +// This function should be called with lock on nodeZoneState cache key status.Node +func (e *egressIPZoneController) createReroutePolicyOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if err != nil { - return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + var nextHopIP string + var err error + // NOTE: No need to check if status.node exists or not in the cache, we are calling this function only if it + // is present in the nodeZoneState cache. Since we call it with lock on cache, we are safe here. + isLocalZoneEgressNode, _ := e.nodeZoneState.Load(status.Node) + if isLocalZoneEgressNode { + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + } + nextHopIP = gatewayRouterIP.String() + } else if config.OVNKubernetesFeature.EnableInterconnect { + // fetch node annotation of the egress node + nextHopIP, err = e.getTransitIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to fetch transit switch IP for node %s: %v", status.Node, err) + } + } + // if neither of these above conditions are met, return error since we don't want to add an empty nextHop LRP + if nextHopIP == "" { + return nil, fmt.Errorf("unable to determine nextHop for egressIP %s with status %v", egressIPName, status) } - // Handle all pod IPs that match the egress IP address family for _, podIPNet := range util.MatchAllIPNetFamily(isEgressIPv6, podIPNets) { lrp := nbdb.LogicalRouterPolicy{ Match: fmt.Sprintf("%s.src == %s", ipFamilyName(isEgressIPv6), podIPNet.IP.String()), Priority: types.EgressIPReroutePriority, - Nexthops: []string{gatewayRouterIP.String()}, + Nexthops: []string{nextHopIP}, Action: nbdb.LogicalRouterPolicyActionReroute, ExternalIDs: map[string]string{ "name": egressIPName, @@ -2555,23 +1690,89 @@ func (e *egressIPController) createReroutePolicyOps(ops []ovsdb.Operation, podIP // the specified gatewayRouterIP from nexthops // - if the LogicalRouterPolicy exist and has the len(nexthops) == 1: it removes // the LogicalRouterPolicy completely -func (e *egressIPController) deleteReroutePolicyOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { +// This function should be called with lock on nodeZoneState cache key status.Node +func (e *egressIPZoneController) deleteReroutePolicyOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if err != nil { - return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + var nextHopIP string + var err error + isLocalZoneEgressNode, loadedEgressNode := e.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + } + nextHopIP = gatewayRouterIP.String() + } else if config.OVNKubernetesFeature.EnableInterconnect { + // fetch node annotation of the egress node + nextHopIP, err = e.getTransitIP(status.Node, isEgressIPv6) + if apierrors.IsNotFound(err) { + // if the node is already gone, then don't count it as error as during deletion. + klog.Warningf("Unable to fetch transit switch IP for node: %s: err: %v", status.Node, err) + } else if err != nil { + return nil, fmt.Errorf("unable to fetch transit switch IP for node %s: %v", status.Node, err) + } + } + // if neither of these above conditions are met, return error since we can't remove an empty nextHop LRP + if nextHopIP == "" { + return nil, fmt.Errorf("unable to determine nextHop for egressIP %s with status %v", egressIPName, status) } - // Handle all pod IPs that match the egress IP address family for _, podIPNet := range util.MatchAllIPNetFamily(isEgressIPv6, podIPNets) { filterOption := fmt.Sprintf("%s.src == %s", ipFamilyName(isEgressIPv6), podIPNet.IP.String()) p := func(item *nbdb.LogicalRouterPolicy) bool { return item.Match == filterOption && item.Priority == types.EgressIPReroutePriority && item.ExternalIDs["name"] == egressIPName } - ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, p, gatewayRouterIP.String()) + ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, p, nextHopIP) if err != nil { return nil, fmt.Errorf("error removing nexthop IP %s from egress ip %s policies on router %s: %v", - gatewayRouterIP, egressIPName, types.OVNClusterRouter, err) + nextHopIP, egressIPName, types.OVNClusterRouter, err) + } + } + return ops, nil +} + +func (e *egressIPZoneController) createStaticRouteOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { + isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + } + // Handle all pod IPs that match the egress IP address family + for _, podIPNet := range util.MatchAllIPNetFamily(isEgressIPv6, podIPNets) { + lrsr := nbdb.LogicalRouterStaticRoute{ + IPPrefix: podIPNet.IP.String(), + Nexthop: gatewayRouterIP.String(), + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + } + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.IPPrefix == lrsr.IPPrefix && item.Nexthop == lrsr.Nexthop && item.ExternalIDs["name"] == lrsr.ExternalIDs["name"] && item.Policy == lrsr.Policy + } + + ops, err = libovsdbops.CreateOrUpdateLogicalRouterStaticRoutesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, &lrsr, p) + if err != nil { + return nil, fmt.Errorf("error creating logical router static route %+v on router %s: %v", lrsr, types.OVNClusterRouter, err) + } + } + return ops, nil +} + +func (e *egressIPZoneController) deleteStaticRouteOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { + isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + } + // Handle all pod IPs that match the egress IP address family + for _, podIPNet := range util.MatchAllIPNetFamily(isEgressIPv6, podIPNets) { + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.IPPrefix == podIPNet.IP.String() && item.Nexthop == gatewayRouterIP.String() && item.ExternalIDs["name"] == egressIPName && item.Policy != nil && *item.Policy == nbdb.LogicalRouterStaticRoutePolicySrcIP + } + ops, err = libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, p) + if err != nil { + return nil, fmt.Errorf("error deleting logical router static route on router %s: %v", types.OVNClusterRouter, err) } } return ops, nil @@ -2585,52 +1786,82 @@ func (e *egressIPController) deleteReroutePolicyOps(ops []ovsdb.Operation, podIP // gatewayRouterIP corresponding to the node in the EgressIPStatusItem, else // just remove the gatewayRouterIP from the list of nexthops // It also returns the list of podIPs whose routes and SNAT's were deleted -func (e *egressIPController) deleteEgressIPStatusSetup(name string, status egressipv1.EgressIPStatusItem) ([]net.IP, error) { +// This function should be called with a lock on e.nodeZoneState.status.Node +func (e *egressIPZoneController) deleteEgressIPStatusSetup(name string, status egressipv1.EgressIPStatusItem) ([]net.IP, error) { isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if errors.Is(err, libovsdbclient.ErrNotFound) { - // if the gateway router join IP setup is already gone, then don't count it as error. - klog.Warningf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) - } else if err != nil { - return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + var nextHopIP string + var err error + isLocalZoneEgressNode, loadedEgressNode := e.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if errors.Is(err, libovsdbclient.ErrNotFound) { + // if the gateway router join IP setup is already gone, then don't count it as error. + klog.Warningf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + } else if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + } + if gatewayRouterIP != nil { + nextHopIP = gatewayRouterIP.String() + } + } else if config.OVNKubernetesFeature.EnableInterconnect { + // fetch node annotation of the egress node + nextHopIP, err = e.getTransitIP(status.Node, isEgressIPv6) + if apierrors.IsNotFound(err) { + // if the node is already gone, then don't count it as error as during deletion. + // technically speaking, the egressNode handler should always get the delete event first before node handler + klog.Warningf("Unable to fetch transit switch IP for node: %s: err: %v", status.Node, err) + } else if err != nil { + return nil, fmt.Errorf("unable to fetch transit switch IP for node %s: %w", status.Node, err) + } } var ops []ovsdb.Operation - if gatewayRouterIP != nil { - gwIP := gatewayRouterIP.String() + if nextHopIP != "" { policyPred := func(item *nbdb.LogicalRouterPolicy) bool { hasGatewayRouterIPNexthop := false for _, nexthop := range item.Nexthops { - if nexthop == gwIP { + if nexthop == nextHopIP { hasGatewayRouterIPNexthop = true break } } return item.Priority == types.EgressIPReroutePriority && item.ExternalIDs["name"] == name && hasGatewayRouterIPNexthop } - ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, nil, types.OVNClusterRouter, policyPred, gwIP) + ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, policyPred, nextHopIP) if err != nil { return nil, fmt.Errorf("error removing nexthop IP %s from egress ip %s policies on router %s: %v", - gatewayRouterIP, name, types.OVNClusterRouter, err) + nextHopIP, name, types.OVNClusterRouter, err) } } - routerName := util.GetGatewayRouterFromNode(status.Node) - natPred := func(nat *nbdb.NAT) bool { - return nat.ExternalIDs["name"] == name && nat.ExternalIP == status.EgressIP - } - nats, err := libovsdbops.FindNATsWithPredicate(e.nbClient, natPred) // save the nats to get the podIPs before that nats get deleted - if err != nil { - return nil, fmt.Errorf("error removing egress ip pods from adress set %s: %v", EgressIPServedPodsAddrSetName, err) - } - ops, err = libovsdbops.DeleteNATsWithPredicateOps(e.nbClient, ops, natPred) - if err != nil { - return nil, fmt.Errorf("error removing egress ip %s nats on router %s: %v", name, routerName, err) + var nats []*nbdb.NAT + if loadedEgressNode && isLocalZoneEgressNode { + if config.OVNKubernetesFeature.EnableInterconnect && nextHopIP != "" { + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.Nexthop == nextHopIP && item.ExternalIDs["name"] == name && item.Policy != nil && *item.Policy == nbdb.LogicalRouterStaticRoutePolicySrcIP + } + ops, err = libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, p) + if err != nil { + return nil, fmt.Errorf("error deleting logical router static routes on router %s for %s: %w", types.OVNClusterRouter, name, err) + } + } + routerName := util.GetGatewayRouterFromNode(status.Node) + natPred := func(nat *nbdb.NAT) bool { + // We should delete NATs only from the status.Node that was passed into this function + return nat.ExternalIDs["name"] == name && nat.ExternalIP == status.EgressIP && nat.LogicalPort != nil && *nat.LogicalPort == types.K8sPrefix+status.Node + } + nats, err = libovsdbops.FindNATsWithPredicate(e.nbClient, natPred) // save the nats to get the podIPs before that nats get deleted + if err != nil { + return nil, fmt.Errorf("error removing egress ip pods from adress set %s: %v", EgressIPServedPodsAddrSetName, err) + } + ops, err = libovsdbops.DeleteNATsWithPredicateOps(e.nbClient, ops, natPred) + if err != nil { + return nil, fmt.Errorf("error removing egress ip %s nats on router %s: %v", name, routerName, err) + } } - _, err = libovsdbops.TransactAndCheck(e.nbClient, ops) if err != nil { - return nil, fmt.Errorf("error trasnsacting ops %+v: %v", ops, err) + return nil, fmt.Errorf("error transacting ops %+v: %v", ops, err) } var podIPs []net.IP for i := range nats { @@ -2666,157 +1897,6 @@ func (oc *DefaultNetworkController) deletePodIPsFromAddressSet(addrSetIPs []net. return nil } -// checkEgressNodesReachability continuously checks if all nodes used for egress -// IP assignment are reachable, and updates the nodes following the result. This -// is important because egress IP is based upon routing traffic to these nodes, -// and if they aren't reachable we shouldn't be using them for egress IP. -func (oc *DefaultNetworkController) checkEgressNodesReachability() { - timer := time.NewTicker(oc.eIPC.reachabilityCheckInterval) - defer timer.Stop() - for { - select { - case <-timer.C: - checkEgressNodesReachabilityIterate(oc) - case <-oc.stopChan: - klog.V(5).Infof("Stop channel got triggered: will stop checkEgressNodesReachability") - return - } - } -} - -func checkEgressNodesReachabilityIterate(oc *DefaultNetworkController) { - reAddOrDelete := map[string]bool{} - oc.eIPC.allocator.Lock() - for _, eNode := range oc.eIPC.allocator.cache { - if eNode.isEgressAssignable && eNode.isReady { - wasReachable := eNode.isReachable - isReachable := oc.isReachable(eNode.name, eNode.mgmtIPs, eNode.healthClient) - if wasReachable && !isReachable { - reAddOrDelete[eNode.name] = true - } else if !wasReachable && isReachable { - reAddOrDelete[eNode.name] = false - } - eNode.isReachable = isReachable - } else { - // End connection (if there is one). This is important because - // it accounts for cases where node is not labelled with - // egress-assignable, so connection is no longer needed. Calling - // this on a already disconnected node is expected to be cheap. - eNode.healthClient.Disconnect() - } - } - oc.eIPC.allocator.Unlock() - for nodeName, shouldDelete := range reAddOrDelete { - if shouldDelete { - metrics.RecordEgressIPUnreachableNode() - klog.Warningf("Node: %s is detected as unreachable, deleting it from egress assignment", nodeName) - if err := oc.deleteEgressNode(nodeName); err != nil { - klog.Errorf("Node: %s is detected as unreachable, but could not re-assign egress IPs, err: %v", nodeName, err) - } - } else { - klog.Infof("Node: %s is detected as reachable and ready again, adding it to egress assignment", nodeName) - if err := oc.addEgressNode(nodeName); err != nil { - klog.Errorf("Node: %s is detected as reachable and ready again, but could not re-assign egress IPs, err: %v", nodeName, err) - } - } - } -} - -func (oc *DefaultNetworkController) isReachable(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool { - // Check if we need to do node reachability check - if oc.eIPC.egressIPTotalTimeout == 0 { - return true - } - - if oc.eIPC.egressIPNodeHealthCheckPort == 0 { - return isReachableLegacy(nodeName, mgmtIPs, oc.eIPC.egressIPTotalTimeout) - } - return isReachableViaGRPC(mgmtIPs, healthClient, oc.eIPC.egressIPNodeHealthCheckPort, oc.eIPC.egressIPTotalTimeout) -} - -func isReachableLegacy(node string, mgmtIPs []net.IP, totalTimeout int) bool { - var retryTimeOut, initialRetryTimeOut time.Duration - - numMgmtIPs := len(mgmtIPs) - if numMgmtIPs == 0 { - return false - } - - switch totalTimeout { - // Check if we need to do node reachability check - case 0: - return true - case 1: - // Using time duration for initial retry with 700/numIPs msec and retry of 100/numIPs msec - // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. - initialRetryTimeOut = time.Duration(700/numMgmtIPs) * time.Millisecond - retryTimeOut = time.Duration(100/numMgmtIPs) * time.Millisecond - default: - // Using time duration for initial retry with 900/numIPs msec - // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. - initialRetryTimeOut = time.Duration(900/numMgmtIPs) * time.Millisecond - retryTimeOut = initialRetryTimeOut - } - - timeout := initialRetryTimeOut - endTime := time.Now().Add(time.Second * time.Duration(totalTimeout)) - for time.Now().Before(endTime) { - for _, ip := range mgmtIPs { - if dialer.dial(ip, timeout) { - return true - } - } - time.Sleep(100 * time.Millisecond) - timeout = retryTimeOut - } - klog.Errorf("Failed reachability check for %s", node) - return false -} - -type egressIPDial struct{} - -// Blantant copy from: https://github.com/openshift/sdn/blob/master/pkg/network/common/egressip.go#L499-L505 -// Ping a node and return whether or not we think it is online. We do this by trying to -// open a TCP connection to the "discard" service (port 9); if the node is offline, the -// attempt will either time out with no response, or else return "no route to host" (and -// we will return false). If the node is online then we presumably will get a "connection -// refused" error; but the code below assumes that anything other than timeout or "no -// route" indicates that the node is online. -func (e *egressIPDial) dial(ip net.IP, timeout time.Duration) bool { - conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip.String(), "9"), timeout) - if conn != nil { - conn.Close() - } - if opErr, ok := err.(*net.OpError); ok { - if opErr.Timeout() { - return false - } - if sysErr, ok := opErr.Err.(*os.SyscallError); ok && sysErr.Err == syscall.EHOSTUNREACH { - return false - } - } - return true -} - -type egressIPHealthcheckClientAllocator struct{} - -func (hccAlloc *egressIPHealthcheckClientAllocator) allocate(nodeName string) healthcheck.EgressIPHealthClient { - return healthcheck.NewEgressIPHealthClient(nodeName) -} - -func isReachableViaGRPC(mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient, healthCheckPort, totalTimeout int) bool { - dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Duration(totalTimeout)*time.Second) - defer dialCancel() - - if !healthClient.IsConnected() { - // gRPC session is not up. Attempt to connect and if that suceeds, we will declare node as reacheable. - return healthClient.Connect(dialCtx, mgmtIPs, healthCheckPort) - } - - // gRPC session is already established. Send a probe, which will succeed, or close the session. - return healthClient.Probe(dialCtx) -} - func getClusterSubnets() ([]*net.IPNet, []*net.IPNet) { var v4ClusterSubnets = []*net.IPNet{} var v6ClusterSubnets = []*net.IPNet{} @@ -3049,61 +2129,3 @@ func getPodNamespaceAndNameFromKey(podKey string) (string, string) { parts := strings.Split(podKey, "_") return parts[0], parts[1] } - -func getEgressIPAllocationTotalCount(allocator allocator) float64 { - count := 0 - allocator.Lock() - defer allocator.Unlock() - for _, eNode := range allocator.cache { - count += len(eNode.allocations) - } - return float64(count) -} - -// cloudPrivateIPConfigNameToIPString converts the resource name to the string -// representation of net.IP. Given a limitation in the Kubernetes API server -// (see: https://github.com/kubernetes/kubernetes/pull/100950) -// CloudPrivateIPConfig.metadata.name cannot represent an IPv6 address. To -// work-around this limitation it was decided that the network plugin creating -// the CR will fully expand the IPv6 address and replace all colons with dots, -// ex: - -// The CloudPrivateIPConfig name fc00.f853.0ccd.e793.0000.0000.0000.0054 will be -// represented as address: fc00:f853:ccd:e793::54 - -// We thus need to replace every fifth character's dot with a colon. -func cloudPrivateIPConfigNameToIPString(name string) string { - // Handle IPv4, which will work fine. - if ip := net.ParseIP(name); ip != nil { - return name - } - // Handle IPv6, for which we want to convert the fully expanded "special - // name" to go's default IP representation - name = strings.ReplaceAll(name, ".", ":") - return net.ParseIP(name).String() -} - -// ipStringToCloudPrivateIPConfigName converts the net.IP string representation -// to a CloudPrivateIPConfig compatible name. - -// The string representation of the IPv6 address fc00:f853:ccd:e793::54 will be -// represented as: fc00.f853.0ccd.e793.0000.0000.0000.0054 - -// We thus need to fully expand the IP string and replace every fifth -// character's colon with a dot. -func ipStringToCloudPrivateIPConfigName(ipString string) (name string) { - ip := net.ParseIP(ipString) - if ip.To4() != nil { - return ipString - } - dst := make([]byte, hex.EncodedLen(len(ip))) - hex.Encode(dst, ip) - for i := 0; i < len(dst); i += 4 { - if len(dst)-i == 4 { - name += string(dst[i : i+4]) - } else { - name += string(dst[i:i+4]) + "." - } - } - return -} diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 57a6cb03c2..0406dc2f6d 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -7,13 +7,13 @@ import ( "time" "github.com/onsi/ginkgo" + ginkgotable "github.com/onsi/ginkgo/extensions/table" "github.com/onsi/gomega" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" @@ -22,61 +22,13 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/urfave/cli/v2" - kapi "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8stypes "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" - utilnet "k8s.io/utils/net" utilpointer "k8s.io/utils/pointer" ) -type fakeEgressIPDialer struct{} - -func (f fakeEgressIPDialer) dial(ip net.IP, timeout time.Duration) bool { - return true -} - -type fakeEgressIPHealthClient struct { - Connected bool - ProbeCount int - FakeProbeFailure bool -} - -func (fehc *fakeEgressIPHealthClient) IsConnected() bool { - return fehc.Connected -} - -func (fehc *fakeEgressIPHealthClient) Connect(dialCtx context.Context, mgmtIPs []net.IP, healthCheckPort int) bool { - if fehc.FakeProbeFailure { - return false - } - fehc.Connected = true - return true -} - -func (fehc *fakeEgressIPHealthClient) Disconnect() { - fehc.Connected = false - fehc.ProbeCount = 0 -} - -func (fehc *fakeEgressIPHealthClient) Probe(dialCtx context.Context) bool { - if fehc.Connected && !fehc.FakeProbeFailure { - fehc.ProbeCount++ - return true - } - return false -} - -type fakeEgressIPHealthClientAllocator struct{} - -func (f *fakeEgressIPHealthClientAllocator) allocate(nodeName string) healthcheck.EgressIPHealthClient { - return &fakeEgressIPHealthClient{} -} - var ( - reroutePolicyID = "reroute_policy_id" - natID = "nat_id" nodeLogicalRouterIPv6 = []string{"fef0::56"} nodeLogicalRouterIPv4 = []string{"100.64.0.2"} node2LogicalRouterIPv4 = []string{"100.64.0.3"} @@ -114,51 +66,6 @@ func newEgressIPMeta(name string) metav1.ObjectMeta { var egressPodLabel = map[string]string{"egress": "needed"} -func setupNode(nodeName string, ipNets []string, mockAllocationIPs map[string]string) egressNode { - var v4IP, v6IP net.IP - var v4Subnet, v6Subnet *net.IPNet - for _, ipNet := range ipNets { - ip, net, _ := net.ParseCIDR(ipNet) - if utilnet.IsIPv6CIDR(net) { - v6Subnet = net - v6IP = ip - } else { - v4Subnet = net - v4IP = ip - } - } - - mockAllcations := map[string]string{} - for mockAllocationIP, egressIPName := range mockAllocationIPs { - mockAllcations[net.ParseIP(mockAllocationIP).String()] = egressIPName - } - - node := egressNode{ - egressIPConfig: &util.ParsedNodeEgressIPConfiguration{ - V4: util.ParsedIFAddr{ - IP: v4IP, - Net: v4Subnet, - }, - V6: util.ParsedIFAddr{ - IP: v6IP, - Net: v6Subnet, - }, - Capacity: util.Capacity{ - IP: util.UnlimitedNodeCapacity, - IPv4: util.UnlimitedNodeCapacity, - IPv6: util.UnlimitedNodeCapacity, - }, - }, - allocations: mockAllcations, - healthClient: hccAllocator.allocate(nodeName), // using fakeEgressIPHealthClientAllocator - name: nodeName, - isReady: true, - isReachable: true, - isEgressAssignable: true, - } - return node -} - var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { var ( app *cli.App @@ -178,15 +85,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - dialer = fakeEgressIPDialer{} - hccAllocator = &fakeEgressIPHealthClientAllocator{} - - getEgressIPAllocatorSizeSafely := func() int { - fakeOvn.controller.eIPC.allocator.Lock() - defer fakeOvn.controller.eIPC.allocator.Unlock() - return len(fakeOvn.controller.eIPC.allocator.cache) - } - getEgressIPStatusLen := func(egressIPName string) func() int { return func() int { tmp, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), egressIPName, metav1.GetOptions{}) @@ -218,17 +116,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { return reAssignmentCount } - isEgressAssignableNode := func(nodeName string) func() bool { - return func() bool { - fakeOvn.controller.eIPC.allocator.Lock() - defer fakeOvn.controller.eIPC.allocator.Unlock() - if item, exists := fakeOvn.controller.eIPC.allocator.cache[nodeName]; exists { - return item.isEgressAssignable - } - return false - } - } - nodeSwitch := func() string { _, nodes := getEgressIPStatus(egressIPName) if len(nodes) != 1 { @@ -254,7 +141,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.shutdown() }) - getPodAssignmentState := func(pod *kapi.Pod) *podAssignmentState { + getPodAssignmentState := func(pod *v1.Pod) *podAssignmentState { fakeOvn.controller.eIPC.podAssignmentMutex.Lock() defer fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() if pas := fakeOvn.controller.eIPC.podAssignment[getPodKey(pod)]; pas != nil { @@ -265,79 +152,295 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ginkgo.Context("On node UPDATE", func() { - ginkgo.It("should re-assign EgressIPs and perform proper OVN transactions when pod is created after node egress label switch", func() { - app.Action = func(ctx *cli.Context) error { + ginkgotable.DescribeTable("should perform proper OVN transactions when pod is created after node egress label switch", + func(interconnect bool) { + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }) - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} + fakeOvn.controller.nbClient.Get(context.Background(), lsp) + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + + fakeOvn.patchEgressIPObj(node1Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1.Labels = map[string]string{} + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: node2LogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled", false), + ginkgotable.Entry("interconnect enabled", true), + ) + + ginkgotable.DescribeTable("using EgressNode retry should perform proper OVN transactions when pod is created after node egress label switch", + func(interconnect bool) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + node3IPv4 := "192.168.126.0/24" + + egressPod := *newPodWithLabels(namespace, podName, "node3", podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node3IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels = map[string]string{} + node3 := getNodeObj("node3", annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + initialDB := libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + Networks: []string{node2LogicalRouterIfAddrV4}, }, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", @@ -373,561 +476,1191 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + } + fakeOvn.startWithDBSetup( + initialDB, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2, node3}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }) - lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} - fakeOvn.controller.nbClient.Get(context.Background(), lsp) - gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) - gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} + err = fakeOvn.controller.nbClient.Get(context.Background(), lsp) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) - node1.Labels = map[string]string{} - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } + lsp = &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name} + err = fakeOvn.controller.nbClient.Get(context.Background(), lsp) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + node1.Labels = map[string]string{} + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node2.Annotations["k8s.ovn.org/host-addresses"] = fmt.Sprintf("[\"%s\",\"%s\"]", "192.168.126.51", "") + ginkgo.By("Bringing down NBDB") + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) + err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail + // there is a chance that both egressnode events(node1 removal and node2 update) will end up in the same event queue + // sleep for double the time to allow for two consecutive TransactWithRetry timeouts + time.Sleep(2 * (types.OVSDBTimeout + time.Second)) + // check to see if the retry cache has an entry + key1 := node1.Name + ginkgo.By("retry entry: old obj should not be nil, new obj should be nil") + retry.CheckRetryObjectMultipleFieldsEventually( + key1, + fakeOvn.controller.retryEgressNodes, + gomega.Not(gomega.BeNil()), // oldObj should not be nil + gomega.BeNil(), // newObj should be nil + ) + + key2 := node2.Name + ginkgo.By("retry entry: old obj should be nil, new obj should not be nil, config should not be nil") + retry.CheckRetryObjectMultipleFieldsEventually( + key2, + fakeOvn.controller.retryEgressNodes, + gomega.BeNil(), // oldObj should be nil + gomega.Not(gomega.BeNil()), // newObj should not be nil + gomega.Not(gomega.BeNil()), // config should not be nil + ) + fakeOvn.patchEgressIPObj(node2Name, egressIP) + connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) + defer cancel() + resetNBClient(connCtx, fakeOvn.controller.nbClient) + retry.SetRetryObjWithNoBackoff(key1, fakeOvn.controller.retryEgressNodes) + retry.SetRetryObjWithNoBackoff(key2, fakeOvn.controller.retryEgressNodes) + fakeOvn.controller.retryEgressNodes.RequestRetryObjs() + // check the cache no longer has the entry + retry.CheckRetryObjectEventually(key1, false, fakeOvn.controller.retryEgressNodes) + retry.CheckRetryObjectEventually(key2, false, fakeOvn.controller.retryEgressNodes) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: node2LogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "(ip4.src == $a4548040316634674295 || ip4.src == $a13607449821398607916) && ip4.dst == $a14918748166599097711", + Action: nbdb.LogicalRouterPolicyActionAllow, + Options: map[string]string{"pkt_mark": "1008"}, + UUID: "no-reroute-node-UUID", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID", "no-reroute-node-UUID"}, }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, }, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + /* GARP is deleted since node has been deleted */ + // "nat-addresses": "router", + // "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled", false), + ginkgotable.Entry("interconnect enabled", true), // all 3 nodes in same zone, so behaves like non-ic + ) + + ginkgotable.DescribeTable("should perform proper OVN transactions when namespace and pod is created after node egress label switch", + func(interconnect bool, node1Zone, node2Zone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.3/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: nil, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) - ginkgo.It("using EgressNode retry should re-assign EgressIPs and perform proper OVN transactions when pod is created after node egress label switch", func() { - app.Action = func(ctx *cli.Context) error { + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + fakeOvn.patchEgressIPObj(node1Name, egressIP) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} + fakeOvn.controller.nbClient.Get(context.Background(), lsp) + if node1Zone == "global" { + // GARP is configured only for nodes in local zones, the master of the remote zone will do it for the remote nodes + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + } + + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1.Labels = map[string]string{} + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Create(context.TODO(), egressNamespace, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + expectedNatLogicalPort := "k8s-node2" + reroutePolicyNextHop := node2LogicalRouterIPv4 + if interconnect && node1Zone != node2Zone { + reroutePolicyNextHop = []string{"168.254.0.3"} // node2's transit switch portIP + } + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", reroutePolicyNextHop), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, }, }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + } + if node1Zone == "global" { + // GARP is configured only for nodes in local zones, the master of the remote zone will do it for the remote nodes + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } + if node2Zone != "remote" { + // either not interconnect or egressNode is in localZone + eipSNAT := getEIPSNAT(podV4IP, egressIP, expectedNatLogicalPort) + expectedDatabaseState = append(expectedDatabaseState, eipSNAT) + expectedDatabaseState[4].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-UUID"} // 4th item is node2's GR + // add GARP config only if node is in local zone + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } + if node2Zone != node1Zone && node2Zone == "global" { + // node2 will have a static route added for the pod + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(podV4IP, node2LogicalRouterIPv4[0])) + expectedDatabaseState[5].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState[5].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] // reroute logical router policy is in remote zone, hence not visible + } + + gomega.Eventually(fakeOvn.nbClient, inspectTimeout).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + return nil } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global", "global"), + ginkgotable.Entry("interconnect enabled; node1 and node2 in global zones", true, "global", "global"), + // will showcase localzone setup - master is in pod's zone where pod's reroute policy towards egressNode will be done. + // NOTE: SNAT won't be visible because its in remote zone + ginkgotable.Entry("interconnect enabled; node1 in global and node2 in remote zones", true, "global", "remote"), + // will showcase localzone setup - master is in egress node's zone where pod's SNAT policy and static route will be done. + // NOTE: reroute policy won't be visible because its in remote zone (pod is in remote zone) + ginkgotable.Entry("interconnect enabled; node1 in remote and node2 in global zones", true, "remote", "global"), + ) + }) + + ginkgo.Context("On node DELETE", func() { + + ginkgotable.DescribeTable("should perform proper OVN transactions when node's gateway objects are already deleted", + func(interconnect bool, node1Zone, node2Zone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node1Zone, + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.3/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, }, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.OVNJoinSwitch + "-UUID", + Name: types.OVNJoinSwitch, }, }, }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }) + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }) - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) - lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} - err = fakeOvn.controller.nbClient.Get(context.Background(), lsp) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) - gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} + fakeOvn.controller.nbClient.Get(context.Background(), lsp) + if node1Zone == "global" { + // only if node is global we add this + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + } - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - node1.Labels = map[string]string{} - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Bringing down NBDB") - // inject transient problem, nbdb is down - fakeOvn.controller.nbClient.Close() - gomega.Eventually(func() bool { - return fakeOvn.controller.nbClient.Connected() - }).Should(gomega.BeFalse()) - err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail - // there is a chance that both egressnode events(node1 removal and node2 update) will end up in the same event queue - // sleep for double the time to allow for two consecutive TransactWithRetry timeouts - time.Sleep(2 * (types.OVSDBTimeout + time.Second)) - // check to see if the retry cache has an entry - key1 := node1.Name - ginkgo.By("retry entry: old obj should not be nil, new obj should be nil") - retry.CheckRetryObjectMultipleFieldsEventually( - key1, - fakeOvn.controller.retryEgressNodes, - gomega.Not(gomega.BeNil()), // oldObj should not be nil - gomega.BeNil(), // newObj should be nil - ) + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - key2 := node2.Name - ginkgo.By("retry entry: old obj should be nil, new obj should not be nil, config should not be nil") - retry.CheckRetryObjectMultipleFieldsEventually( - key2, - fakeOvn.controller.retryEgressNodes, - gomega.BeNil(), // oldObj should be nil - gomega.Not(gomega.BeNil()), // newObj should not be nil - gomega.Not(gomega.BeNil()), // config should not be nil - ) + expectedNatLogicalPort := "k8s-node1" + primarySNAT := getEIPSNAT(podV4IP, egressIP, expectedNatLogicalPort) + primarySNAT.UUID = "egressip-nat1-UUID" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", nodeLogicalRouterIPv4), + primarySNAT, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat1-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.OVNJoinSwitch + "-UUID", + Name: types.OVNJoinSwitch, + }, + } + if node2Zone != "remote" { + // add GARP config only if node is in local zone + expectedDatabaseState[10].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[10].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } + if node1Zone != "remote" { + // add GARP config only if node is in local zone + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } else { + // if node1 where the pod lives is remote we can't see the EIP setup done since master belongs to local zone + expectedDatabaseState[4].(*nbdb.LogicalRouter).Nat = []string{} + expectedDatabaseState[6].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState = expectedDatabaseState[2:] + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) - defer cancel() - resetNBClient(connCtx, fakeOvn.controller.nbClient) - retry.SetRetryObjWithNoBackoff(key1, fakeOvn.controller.retryEgressNodes) - retry.SetRetryObjWithNoBackoff(key2, fakeOvn.controller.retryEgressNodes) - fakeOvn.controller.retryEgressNodes.RequestRetryObjs() - // check the cache no longer has the entry - retry.CheckRetryObjectEventually(key1, false, fakeOvn.controller.retryEgressNodes) - retry.CheckRetryObjectEventually(key2, false, fakeOvn.controller.retryEgressNodes) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + err = fakeOvn.controller.gatewayCleanup(node1Name) // simulate an already deleted node + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, + // NOTE: This test checks if plumbing is removed when node is gone but pod on the node is still present (unusual scenario) + // Thus we need to check the cache state to verify things in unit tests to avoid races - we don't control the order of + // node's deletion removing the entry from localZonesCache versus the add happening for the pod. + // (in real env this won't be a problem since eventually things will reconcile as pod will also be gone if node is gone) + gomega.Eventually(func() bool { + _, ok := fakeOvn.controller.eIPC.nodeZoneState.Load(egressPod.Spec.NodeName) + return ok + }).Should(gomega.BeFalse()) + + // W0608 12:53:33.728205 1161455 egressip.go:2030] Unable to retrieve gateway IP for node: node1, protocol is IPv6: false, err: attempt at finding node gateway router network information failed, err: unable to find router port rtoj-GR_node1: object not found + // 2023-04-25T11:01:13.2804834Z W0425 11:01:13.280407 21055 egressip.go:2036] Unable to fetch transit switch IP for node: node1: err: failed to get node node1: node "node1" not found + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) // egressIP successfully reassigned to node2 + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + expectedNatLogicalPort = "k8s-node2" + eipSNAT := getEIPSNAT(podV4IP, egressIP, expectedNatLogicalPort) + expectedDatabaseState = []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", node2LogicalRouterIPv4), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, }, - }, - } + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.OVNJoinSwitch + "-UUID", + Name: types.OVNJoinSwitch, + }, + } + if node2Zone != "remote" { + // either not interconnect or egressNode is in localZone + expectedDatabaseState = append(expectedDatabaseState, eipSNAT) + expectedDatabaseState[3].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-UUID"} // 4th item is node2's GR + // add GARP config only if node is in local zone + expectedDatabaseState[7].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[7].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } + if node1Zone == "global" { + // even if node1's GR is gone already, in the libovsdb test framework we need to explicitly remove the NATs from GR. + // This won't be a problem in real env, See https://github.com/ovn-org/libovsdb/issues/338 for details. + // Hence we will be left with a stale SNAT + // in non-IC setup, this is not dependent on localZoneNodes cache; we nuke all SNATs for this pod on all nodes + // hence we need to do this only when its IC and pod is in local zone + expectedDatabaseState = append(expectedDatabaseState, primarySNAT) + } + // all cases: reroute logical router policy is gone and won't be recreated since node1 is deleted - that is where the pod lives + // NOTE: This test is not really a real scenario, it depicts a transient state. + expectedDatabaseState[4].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global", "global"), + ginkgotable.Entry("interconnect enabled; node1 and node2 in global zones", true, "global", "global"), + // will showcase localzone setup - master is in pod's zone where pod's reroute policy towards egressNode will be done. + // NOTE: SNAT won't be visible because its in remote zone + ginkgotable.Entry("interconnect enabled; node1 in global and node2 in remote zones", true, "global", "remote"), + // will showcase localzone setup - master is in egress node's zone where pod's SNAT policy and static route will* be done. + // * the static route won't be visible because the pod's node node1 is getting deleted in this test + // NOTE: reroute policy won't be visible because its in remote zone (pod is in remote zone) + ginkgotable.Entry("interconnect enabled; node1 in remote and node2 in global zones", true, "remote", "global"), + ) + }) - ginkgo.It("should re-assign EgressIPs and perform proper OVN transactions when namespace and pod is created after node egress label switch", func() { - app.Action = func(ctx *cli.Context) error { + ginkgo.Context("IPv6 on pod UPDATE", func() { - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + ginkgotable.DescribeTable("should remove OVN pod egress setup when EgressIP stops matching pod label", + func(interconnect, isnode1Local, isnode2Local bool) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + node2IPv4 := "192.168.126.202/24" + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v6NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\", \"ipv6\": \"fd97::2/64\"}", // used only for ic=true test + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) // add node to avoid errori-ing out on transit switch IP fetch + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, }, }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &v1.NodeList{ + Items: []v1.Node{node2}, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + ) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, }, }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } + } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, isnode1Local) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, isnode2Local) + if isnode1Local { + fakeOvn.controller.localZoneNodes.Store(node1Name, true) + } + if isnode2Local { + fakeOvn.controller.localZoneNodes.Store(node2Name, true) + } + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + } + if !isnode1Local { + // case4: egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + if !isnode2Local { + // case3: pod's SNAT is not visible because egress node is remote + expectedDatabaseState[5].(*nbdb.LogicalRouter).Nat = []string{} + expectedDatabaseState = expectedDatabaseState[2:] + // add policy with nextHop towards egressNode's transit switchIP + expectedDatabaseState = append(expectedDatabaseState, getReRoutePolicy(egressPod.Status.PodIP, "6", []string{"fd97::2"})) + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + podUpdate := newPod(namespace, podName, node1Name, podV6IP) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, true, true), + ginkgotable.Entry("interconnect enabled; pod and egressnode are in local zone", true, true, true), + ginkgotable.Entry("interconnect enabled; pod is in local zone and egressnode is in remote zone", true, true, false), // snat won't be visible + ginkgotable.Entry("interconnect enabled; pod is in remote zone and egressnode is in local zone", true, false, true), + ) + ginkgotable.DescribeTable("egressIP pod retry should remove OVN pod egress setup when EgressIP stops matching pod label", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + } + + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + } + if podZone == "remote" { + // egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + podUpdate := newPod(namespace, podName, node1Name, podV6IP) + ginkgo.By("Bringing down NBDB") + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + time.Sleep(types.OVSDBTimeout + time.Second) + // check to see if the retry cache has an entry + var key string + key, err = retry.GetResourceKey(podUpdate) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + ginkgo.By("retry entry: new obj should not be nil, config should not be nil") + retry.CheckRetryObjectMultipleFieldsEventually( + key, + fakeOvn.controller.retryEgressIPPods, + gomega.BeNil(), // oldObj should be nil + gomega.Not(gomega.BeNil()), // newObj should not be nil + gomega.Not(gomega.BeNil()), // config should not be nil + ) + + connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) + defer cancel() + resetNBClient(connCtx, fakeOvn.controller.nbClient) + + retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressIPPods) + fakeOvn.controller.retryEgressIPPods.RequestRetryObjs() + // check the cache no longer has the entry + retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressIPPods) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), // static re-route is visible but reroute policy won't be + ) + + ginkgo.It("should not treat pod update if pod already had assigned IP when it got the ADD", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: nil, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", }, }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }) + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) - i, n, _ := net.ParseCIDR(podV4IP + "/23") + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + } + + i, n, _ := net.ParseCIDR(podV6IP + "/23") n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) @@ -935,75 +1668,44 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - - lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} - fakeOvn.controller.nbClient.Get(context.Background(), lsp) - gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) - gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) - err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - node1.Labels = map[string]string{} - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Create(context.TODO(), egressNamespace, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) expectedNatLogicalPort := "k8s-node2" expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, &nbdb.LogicalRouterPolicy{ Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), + Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, + Nexthops: nodeLogicalRouterIPv6, ExternalIDs: map[string]string{ "name": eIP.Name, }, UUID: "reroute-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, &nbdb.NAT{ UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, + LogicalIP: podV6IP, + ExternalIP: egressIP.String(), ExternalIDs: map[string]string{ "name": egressIPName, }, @@ -1014,50 +1716,31 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", Nat: []string{"egressip-nat-UUID"}, }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - gomega.Eventually(fakeOvn.nbClient, inspectTimeout).Should(libovsdbtest.HaveData(expectedDatabaseState)) + podUpdate := newPodWithLabels(namespace, podName, node1Name, podV6IP, map[string]string{ + "egress": "needed", + "some": "update", + }) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -1206,58 +1889,164 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ginkgo.Context("On node DELETE", func() { - ginkgo.It("should re-assign EgressIPs and perform proper OVN transactions when node's gateway objects are already deleted", func() { - app.Action = func(ctx *cli.Context) error { + ginkgotable.DescribeTable("should treat pod update if pod did not have an assigned IP when it got the ADD", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespace(namespace) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + }, }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, }, }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + podUpdate := newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + podUpdate.Annotations = map[string]string{ + "k8s.ovn.org/pod-networks": fmt.Sprintf("{\"default\":{\"ip_addresses\":[\"%s/23\"],\"mac_address\":\"0a:58:0a:83:00:0f\",\"gateway_ips\":[\"%s\"],\"ip_address\":\"%s/23\",\"gateway_ip\":\"%s\"}}", podV6IP, v6GatewayIP, podV6IP, v6GatewayIP), + } + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(podV6IP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + } + if podZone == "remote" { + // egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(podV6IP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), // static re-route is visible but reroute policy won't be + ) + + ginkgo.It("should not treat pod DELETE if pod did not have an assigned IP when it got the ADD and we receive a DELETE before the IP UPDATE", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespace(namespace) + fakeOvn.startWithDBSetup(clusterRouterDbSetup, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, + EgressIPs: []string{ + egressIP.String(), + }, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -1267,327 +2056,541 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{node2LogicalRouterIfAddrV4}, + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Delete(context.TODO(), egressPod.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("IPv6 on namespace UPDATE", func() { + + ginkgotable.DescribeTable("should remove OVN pod egress setup when EgressIP is deleted", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + } + if podZone == "remote" { + // egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), + ) + + ginkgotable.DescribeTable("egressIP retry should remove OVN pod egress setup when EgressIP is deleted", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, }, - &nbdb.LogicalSwitch{ - UUID: types.OVNJoinSwitch + "-UUID", - Name: types.OVNJoinSwitch, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, }, }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }) + } - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } - lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} - fakeOvn.controller.nbClient.Get(context.Background(), lsp) - gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) - gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + ginkgo.By("Bringing down NBDB") + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) - expectedNatLogicalPort := "k8s-node1" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, + // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail + time.Sleep(types.OVSDBTimeout + time.Second) + // check to see if the retry cache has an entry + key, err := retry.GetResourceKey(&eIP) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + retry.CheckRetryObjectEventually(key, true, fakeOvn.controller.retryEgressIPs) + + connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) + defer cancel() + resetNBClient(connCtx, fakeOvn.controller.nbClient) + retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressIPs) + fakeOvn.controller.retryEgressIPs.RequestRetryObjs() + // check the cache no longer has the entry + retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressIPs) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{node2LogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + } + if podZone == "remote" { + // egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - }, - &nbdb.LogicalSwitch{ - UUID: types.OVNJoinSwitch + "-UUID", - Name: types.OVNJoinSwitch, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - err = fakeOvn.controller.gatewayCleanup(node1Name) // simulate an already deleted node + err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), + ) + + ginkgotable.DescribeTable("should remove OVN pod egress setup when EgressIP stops matching", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) - err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } - // E0608 12:53:33.728155 1161455 egressip.go:882] Allocator error: EgressIP: egressip claims to have an allocation on a node which is unassignable for egress IP: node1 - // W0608 12:53:33.728205 1161455 egressip.go:2030] Unable to retrieve gateway IP for node: node1, protocol is IPv6: false, err: attempt at finding node gateway router network information failed, err: unable to find router port rtoj-GR_node1: object not found - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) // egressIP successfully reassigned to node2 - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + }, + } - expectedNatLogicalPort = "k8s-node2" - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: node2LogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podV6IP, + ExternalIP: egressIP.String(), + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{node2LogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + } + if podZone == "remote" { + // pod is in remote zone, its LRP won't be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[1].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState[1].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState = expectedDatabaseState[1:] + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + namespaceUpdate := newNamespace(namespace) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.TODO(), namespaceUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, - }, - &nbdb.LogicalSwitch{ - UUID: types.OVNJoinSwitch + "-UUID", - Name: types.OVNJoinSwitch, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("IPv6 on pod UPDATE", func() { + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), + ) - ginkgo.It("should remove OVN pod egress setup when EgressIP stops matching pod label", func() { + ginkgo.It("should not remove OVN pod egress setup when EgressIP stops matching, but pod never had any IP to begin with", func() { app.Action = func(ctx *cli.Context) error { egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, - }, - }, - }, + egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + fakeOvn.startWithDBSetup(clusterRouterDbSetup, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, @@ -1596,11 +2599,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), + ObjectMeta: newEgressIPMeta("egressip"), Spec: egressipv1.EgressIPSpec{ EgressIPs: []string{ egressIP.String(), @@ -1609,17 +2609,11 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { MatchLabels: egressPodLabel, }, NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, + MatchLabels: egressPodLabel, }, }, } - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() @@ -1630,112 +2624,418 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + namespaceUpdate := newNamespace(namespace) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.TODO(), namespaceUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + ginkgo.Context("on EgressIP UPDATE", func() { + + ginkgotable.DescribeTable("should update OVN on EgressIP .spec.egressips change", + func(interconnect bool, node1Zone, node2Zone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + egressIP2 := "192.168.126.102" + egressIP3 := "192.168.126.103" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.3/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test + } + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + }, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP1, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(eIP.Name, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(2)) + egressIPs, nodes := getEgressIPStatus(eIP.Name) + assignmentNode1, assignmentNode2 := nodes[0], nodes[1] + assignedEgressIP1, assignedEgressIP2 := egressIPs[0], egressIPs[1] + + expectedNatLogicalPort1 := fmt.Sprintf("k8s-%s", assignmentNode1) + expectedNatLogicalPort2 := fmt.Sprintf("k8s-%s", assignmentNode2) + natEIP1 := &nbdb.NAT{ + UUID: "egressip-nat-1-UUID", + LogicalIP: podV4IP, + ExternalIP: assignedEgressIP1, ExternalIDs: map[string]string{ "name": egressIPName, }, Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, + LogicalPort: &expectedNatLogicalPort1, Options: map[string]string{ "stateless": "false", }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - } - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + } + natEIP2 := &nbdb.NAT{ + UUID: "egressip-nat-2-UUID", + LogicalIP: podV4IP, + ExternalIP: assignedEgressIP2, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort2, + Options: map[string]string{ + "stateless": "false", + }, + } + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", []string{"100.64.0.2", "100.64.0.3"}), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + assignmentNode1, + UUID: ovntypes.GWRouterPrefix + assignmentNode1 + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + assignmentNode2, + UUID: ovntypes.GWRouterPrefix + assignmentNode2 + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + } + if !interconnect || node1Zone == "global" { + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + expectedDatabaseState[3].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-1-UUID"} + expectedDatabaseState = append(expectedDatabaseState, natEIP1) + } + if !interconnect || node2Zone == "global" { + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + expectedDatabaseState[4].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-2-UUID"} + expectedDatabaseState = append(expectedDatabaseState, natEIP2) + } + if node2Zone != node1Zone && node2Zone == "remote" { + // the policy reroute will have its second nexthop as transit switchIP + // so the one with join switchIP is where podNode == egressNode and one with transitIP is where podNode != egressNode + expectedDatabaseState[0].(*nbdb.LogicalRouterPolicy).Nexthops = []string{"100.64.0.2", "168.254.0.3"} + } + if node2Zone != node1Zone && node1Zone == "remote" { + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, "100.64.0.3")) + expectedDatabaseState[5].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState[5].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] // policy is not visible since podNode is remote + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - podUpdate := newPod(namespace, podName, node1Name, podV6IP) + latest, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + latest.Spec.EgressIPs = []string{egressIP3, egressIP2} + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), latest, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status = []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP3, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(eIP.Name, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, - }, + gomega.Eventually(func() []string { + egressIPs, _ = getEgressIPStatus(eIP.Name) + return egressIPs + }).Should(gomega.ConsistOf(egressIP3, egressIP2)) + + egressIPs, nodes = getEgressIPStatus(eIP.Name) + assignmentNode1, assignmentNode2 = nodes[0], nodes[1] + assignedEgressIP1, assignedEgressIP2 = egressIPs[0], egressIPs[1] + + expectedNatLogicalPort1 = fmt.Sprintf("k8s-%s", assignmentNode1) + expectedNatLogicalPort2 = fmt.Sprintf("k8s-%s", assignmentNode2) + expectedDatabaseState = []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", []string{"100.64.0.2", "100.64.0.3"}), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + assignmentNode1, + UUID: ovntypes.GWRouterPrefix + assignmentNode1 + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + assignmentNode2, + UUID: ovntypes.GWRouterPrefix + assignmentNode2 + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + } + if !interconnect || node1Zone == "global" { + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + expectedDatabaseState[3].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-1-UUID"} + natEIP1.ExternalIP = assignedEgressIP1 + expectedDatabaseState = append(expectedDatabaseState, natEIP1) + } + if !interconnect || node2Zone == "global" { + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + expectedDatabaseState[4].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-2-UUID"} + natEIP2.ExternalIP = assignedEgressIP2 + expectedDatabaseState = append(expectedDatabaseState, natEIP2) + } + if node2Zone != node1Zone && node2Zone == "remote" { + // the policy reroute will have its second nexthop as transit switchIP + // so the one with join switchIP is where podNode == egressNode and one with transitIP is where podNode != egressNode + expectedDatabaseState[0].(*nbdb.LogicalRouterPolicy).Nexthops = []string{"100.64.0.2", "168.254.0.3"} + } + if node2Zone != node1Zone && node1Zone == "remote" { + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, "100.64.0.3")) + expectedDatabaseState[5].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState[5].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] // policy is not visible since podNode is remote + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global", "global"), + ginkgotable.Entry("interconnect enabled; node1 and node2 in global zones", true, "global", "global"), + // will showcase localzone setup - master is in pod's zone where pod's reroute policy towards egressNode will be done. + // NOTE: SNAT won't be visible because its in remote zone + ginkgotable.Entry("interconnect enabled; node1 in global and node2 in remote zones", true, "global", "remote"), + // will showcase localzone setup - master is in egress node's zone where pod's SNAT policy and static route will be done. + // NOTE: reroute policy won't be visible because its in remote zone (pod is in remote zone) + ginkgotable.Entry("interconnect enabled; node1 in remote and node2 in global zones", true, "remote", "global"), + ) - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - ginkgo.It("egressIP pod retry should remove OVN pod egress setup when EgressIP stops matching pod label", func() { + ginkgo.It("should delete and re-create", func() { app.Action = func(ctx *cli.Context) error { egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + updatedEgressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8ffd") egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, Networks: []string{nodeLogicalRouterIfAddrV6}, }, &nbdb.LogicalRouter{ @@ -1743,12 +3043,12 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", Nat: nil, }, }, @@ -1761,8 +3061,13 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -1774,17 +3079,11 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { MatchLabels: egressPodLabel, }, NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, + MatchLabels: egressPodLabel, }, }, } - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() @@ -1795,6 +3094,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) expectedNatLogicalPort := "k8s-node2" @@ -1815,8 +3116,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Policies: []string{"reroute-UUID"}, }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, Networks: []string{nodeLogicalRouterIfAddrV6}, }, &nbdb.NAT{ @@ -1833,55 +3134,47 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", Nat: []string{"egressip-nat-UUID"}, }, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - podUpdate := newPod(namespace, podName, node1Name, podV6IP) - ginkgo.By("Bringing down NBDB") - // inject transient problem, nbdb is down - fakeOvn.controller.nbClient.Close() - gomega.Eventually(func() bool { - return fakeOvn.controller.nbClient.Connected() - }).Should(gomega.BeFalse()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - time.Sleep(types.OVSDBTimeout + time.Second) - // check to see if the retry cache has an entry - var key string - key, err = retry.GetResourceKey(podUpdate) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) + eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("retry entry: new obj should not be nil, config should not be nil") - retry.CheckRetryObjectMultipleFieldsEventually( - key, - fakeOvn.controller.retryEgressIPPods, - gomega.BeNil(), // oldObj should be nil - gomega.Not(gomega.BeNil()), // newObj should not be nil - gomega.Not(gomega.BeNil()), // config should not be nil - ) + eIPUpdate.Spec = egressipv1.EgressIPSpec{ + EgressIPs: []string{ + updatedEgressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + } - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) - defer cancel() - resetNBClient(connCtx, fakeOvn.controller.nbClient) + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, updatedEgressIP.String()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressIPPods) - fakeOvn.controller.retryEgressIPPods.RequestRetryObjs() - // check the cache no longer has the entry - retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressIPPods) + gomega.Eventually(func() []string { + egressIPs, _ = getEgressIPStatus(eIP.Name) + return egressIPs + }).Should(gomega.ContainElement(updatedEgressIP.String())) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) return nil } @@ -1889,145 +3182,123 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should not treat pod update if pod already had assigned IP when it got the ADD", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + }) - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + ginkgo.Context("WatchEgressNodes", func() { - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + ginkgo.It("should populated egress node data as they are tagged `egress assignable` with variants of IPv4/IPv6", func() { + app.Action = func(ctx *cli.Context) error { - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - }, - }, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, }, }, }, + }) + err := fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", } - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedNatLogicalPort := "k8s-node2" expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, + Policies: []string{"reroute-UUID", "no-reroute-service-UUID"}, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "reroute-UUID", }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - - podUpdate := newPodWithLabels(namespace, podName, node1Name, podV6IP, map[string]string{ - "egress": "needed", - "some": "update", - }) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil } @@ -2035,139 +3306,105 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should treat pod update if pod did not have an assigned IP when it got the ADD", func() { + ginkgo.It("using retry to create egress node with forced error followed by an update", func() { app.Action = func(ctx *cli.Context) error { - - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - - egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, - }, - }, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), + nodeIPv4 := "192.168.126.51/24" + nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + node := getNodeObj("node", annotations, map[string]string{}) + fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + nodeName, }, }, }, - } - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + }) + err := fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + node.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By("Bringing down NBDB") + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail + // there is a chance that both egressnode events(node1 removal and node2 update) will end up in the same event queue + // sleep for double the time to allow for two consecutive TransactWithRetry timeouts + time.Sleep(2 * (types.OVSDBTimeout + time.Second)) + // check to see if the retry cache has an entry + key, err := retry.GetResourceKey(&node) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + retry.CheckRetryObjectEventually(key, true, fakeOvn.controller.retryEgressNodes) + ginkgo.By("retry entry: old obj should be nil, new obj should not be nil") + retry.CheckRetryObjectMultipleFieldsEventually( + key, + fakeOvn.controller.retryEgressNodes, + gomega.BeNil(), // oldObj should be nil + gomega.Not(gomega.BeNil()), // newObj should not be nil + ) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - - podUpdate := newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - podUpdate.Annotations = map[string]string{ - "k8s.ovn.org/pod-networks": fmt.Sprintf("{\"default\":{\"ip_addresses\":[\"%s/23\"],\"mac_address\":\"0a:58:0a:83:00:0f\",\"gateway_ips\":[\"%s\"],\"ip_address\":\"%s/23\",\"gateway_ip\":\"%s\"}}", podV6IP, v6GatewayIP, podV6IP, v6GatewayIP), - } - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + node.Labels = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) + defer cancel() + resetNBClient(connCtx, fakeOvn.controller.nbClient) + retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressNodes) + fakeOvn.controller.retryEgressNodes.RequestRetryObjs() + // check the cache no longer has the entry + retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressNodes) - expectedNatLogicalPort := "k8s-node2" expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", podV6IP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, + Policies: []string{"reroute-UUID", "no-reroute-service-UUID"}, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "reroute-UUID", }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + nodeName, + }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) @@ -2178,33 +3415,31 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should not treat pod DELETE if pod did not have an assigned IP when it got the ADD and we receive a DELETE before the IP UPDATE", func() { + ginkgo.It("egressIP pod recreate with same name (stateful-sets) shouldn't use stale logicalPortCache entries", func() { app.Action = func(ctx *cli.Context) error { - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + config.Gateway.DisableSNATMultipleGWs = true - egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) - egressNamespace := newNamespace(namespace) - fakeOvn.startWithDBSetup(clusterRouterDbSetup, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + egressIP1 := "192.168.126.101" + node1IPv4 := "192.168.126.12/24" - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + egressPod1 := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, + EgressIPs: []string{egressIP1}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -2214,188 +3449,232 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1.Name, + EgressIP: egressIP1, + }, + }, + }, + } + nodeSwitch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, } - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - - err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Delete(context.TODO(), egressPod.Name, *metav1.NewDeleteOptions(0)) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("IPv6 on namespace UPDATE", func() { - - ginkgo.It("should remove OVN pod egress setup when EgressIP is deleted", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, }, + nodeSwitch, }, }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod}, + Items: []v1.Pod{egressPod1}, }, ) + // we don't know the real switch UUID in the db, but it can be found by name + swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - }, - } - - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + podAddr := fmt.Sprintf("%s %s", egressPodPortInfo.mac.String(), egressPodIP) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ + expectedNatLogicalPort1 := "k8s-node1" + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, &nbdb.LogicalRouterPolicy{ Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), + Match: fmt.Sprintf("ip4.src == %s", egressPodIP), Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, + Nexthops: nodeLogicalRouterIPv4, ExternalIDs: map[string]string{ "name": eIP.Name, }, - UUID: "reroute-UUID", + UUID: "reroute-UUID1", }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1"}, }, &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), + UUID: "egressip-nat-UUID1", + LogicalIP: egressPodIP.String(), + ExternalIP: egressIP1, ExternalIDs: map[string]string{ "name": egressIPName, }, Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, + LogicalPort: &expectedNatLogicalPort1, Options: map[string]string{ "stateless": "false", }, }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + nodeSwitch, + } + podLSP := &nbdb.LogicalSwitchPort{ + UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", + Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), + Addresses: []string{podAddr}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": egressPod1.Namespace, + }, + Options: map[string]string{ + "requested-chassis": egressPod1.Spec.NodeName, + "iface-id-ver": egressPod1.Name, + }, + PortSecurity: []string{podAddr}, + } + nodeSwitch.Ports = []string{podLSP.UUID} + finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + // delete the pod + err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Delete(context.TODO(), + egressPod1.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + expectedDatabaseStateWithoutPod := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + Ports: []string{}, }, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithoutPod)) + // recreate pod with same name immediately; simulating handler race (pods v/s egressip) condition, + // so instead of proper pod create, we try out egressIP pod setup which will be a no-op since pod doesn't exist + ginkgo.By("should not add egress IP setup for a deleted pod whose entry exists in logicalPortCache") + err = fakeOvn.controller.addPodEgressIPAssignments(egressIPName, eIP.Status.Items, &egressPod1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // pod is gone but logicalPortCache holds the entry for 60seconds + egressPodPortInfo, err = fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeFalse()) + staleEgressPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(staleEgressPodIP).To(gomega.Equal(egressPodIP)) + // no-op + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithoutPod)) + return nil } @@ -2403,178 +3682,301 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("egressIP retry should remove OVN pod egress setup when EgressIP is deleted", func() { + ginkgo.It("egressIP pod recreate with same name (stateful-sets) shouldn't use stale logicalPortCache entries AND stale podAssignment cache entries", func() { app.Action = func(ctx *cli.Context) error { - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + config.Gateway.DisableSNATMultipleGWs = true - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + egressIP1 := "192.168.126.101" + node1IPv4 := "192.168.126.12/24" + + oldEgressPodIP := "10.128.0.50" + egressPod1 := newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + oldAnnotation := map[string]string{"k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.128.0.50/24"],"mac_address":"0a:58:0a:80:00:05","gateway_ips":["10.128.0.1"],"routes":[{"dest":"10.128.0.0/24","nextHop":"10.128.0.1"}],"ip_address":"10.128.0.50/24","gateway_ip":"10.128.0.1"}}`} + egressPod1.Annotations = oldAnnotation + egressNamespace := newNamespace(namespace) + + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1.Name, + EgressIP: egressIP1, + }, + }, + }, + } + nodeSwitch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, }, + nodeSwitch, }, }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod}, + Items: []v1.Pod{*egressPod1}, }, ) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - }, - } - - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - ginkgo.By("Bringing down NBDB") - // inject transient problem, nbdb is down - fakeOvn.controller.nbClient.Close() - gomega.Eventually(func() bool { - return fakeOvn.controller.nbClient.Connected() - }).Should(gomega.BeFalse()) + // we don't know the real switch UUID in the db, but it can be found by name + swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.WatchPods() + fakeOvn.controller.WatchEgressIPNamespaces() + fakeOvn.controller.WatchEgressIPPods() + fakeOvn.controller.WatchEgressNodes() + fakeOvn.controller.WatchEgressIP() - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + oldEgressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(egressPod1, types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail - time.Sleep(types.OVSDBTimeout + time.Second) - // check to see if the retry cache has an entry - key, err := retry.GetResourceKey(&eIP) + egressPodIP, _, err := net.ParseCIDR(oldEgressPodPortInfo.ips[0].String()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - retry.CheckRetryObjectEventually(key, true, fakeOvn.controller.retryEgressIPs) - - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) - defer cancel() - resetNBClient(connCtx, fakeOvn.controller.nbClient) - retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressIPs) - fakeOvn.controller.retryEgressIPs.RequestRetryObjs() - // check the cache no longer has the entry - retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressIPs) - - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + gomega.Expect(egressPodIP.String()).To(gomega.Equal(oldEgressPodIP)) + gomega.Expect(oldEgressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + podAddr := fmt.Sprintf("%s %s", oldEgressPodPortInfo.mac.String(), egressPodIP) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, + expectedNatLogicalPort1 := "k8s-node1" + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: egressPodIP.String(), + ExternalIP: egressIP1, + ExternalIDs: map[string]string{ + "name": egressIPName, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", oldEgressPodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID1", + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1"}, + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + podReRoutePolicy, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, + node1GR, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", Options: map[string]string{ - "stateless": "false", + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, + nodeSwitch, + } + podLSP := &nbdb.LogicalSwitchPort{ + UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", + Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), + Addresses: []string{podAddr}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": egressPod1.Namespace, + }, + Options: map[string]string{ + "requested-chassis": egressPod1.Spec.NodeName, + "iface-id-ver": egressPod1.Name, }, + PortSecurity: []string{podAddr}, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + nodeSwitch.Ports = []string{podLSP.UUID} + finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) + // delete the pod and simulate a cleanup failure: + // 1) create a situation where pod is gone from kapi but egressIP setup wasn't cleanedup due to deletion error + // - we remove annotation from pod to mimic this situation + // 2) leaves us with a stale podAssignment cache + // 3) check to make sure the logicalPortCache is used always even if podAssignment already has the podKey + ginkgo.By("delete the egress IP pod and force the deletion to fail") + egressPod1.Annotations = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Update(context.TODO(), egressPod1, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Wait for the cleared annotations to show up client-side + gomega.Eventually(func() int { + egressPod1, _ = fakeOvn.watcher.GetPod(egressPod1.Namespace, egressPod1.Name) + return len(egressPod1.Annotations) + }, 5).Should(gomega.Equal(0)) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + // Delete the pod to trigger the cleanup failure + err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Delete(context.TODO(), + egressPod1.Name, metav1.DeleteOptions{}) + // internally we have an error: + // E1006 12:51:59.594899 2500972 obj_retry.go:1517] Failed to delete *factory.egressIPPod egressip-namespace/egress-pod, error: pod egressip-namespace/egress-pod: no pod IPs found + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // notice that pod objects aren't cleaned up yet since deletion failed! + // even the LSP sticks around for 60 seconds + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) + // egressIP cache is stale in the sense the podKey has not been deleted since deletion failed + pas := getPodAssignmentState(egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + gomega.Expect(pas.egressStatuses).To(gomega.Equal(map[egressipv1.EgressIPStatusItem]string{ + { + Node: "node1", + EgressIP: "192.168.126.101", + }: "", + })) + // recreate pod with same name immediately; + ginkgo.By("should add egress IP setup for the NEW pod which exists in logicalPortCache") + newEgressPodIP := "10.128.0.60" + egressPod1 = newPodWithLabels(namespace, podName, node1Name, newEgressPodIP, egressPodLabel) + egressPod1.Annotations = map[string]string{"k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.128.0.60/24"],"mac_address":"0a:58:0a:80:00:06","gateway_ips":["10.128.0.1"],"routes":[{"dest":"10.128.0.0/24","nextHop":"10.128.0.1"}],"ip_address":"10.128.0.60/24","gateway_ip":"10.128.0.1"}}`} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Create(context.TODO(), egressPod1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // wait for the logical port cache to get updated with the new pod's IP + var newEgressPodPortInfo *lpInfo + getEgressPodIP := func() string { + newEgressPodPortInfo, err = fakeOvn.controller.logicalPortCache.get(egressPod1, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, _, err := net.ParseCIDR(newEgressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return egressPodIP.String() + } + gomega.Eventually(func() string { + return getEgressPodIP() + }).Should(gomega.Equal(newEgressPodIP)) + gomega.Expect(newEgressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + + // deletion for the older EIP pod object is still being retried so we still have SNAT + // towards nodeIP for new pod which is created by addLogicalPort. + // Note that we while have the stale re-route policy for old pod, the snat for the old pod towards egressIP is gone + // because deleteLogicalPort removes ALL snats for a given pod but doesn't remove the policies. + ipv4Addr, _, _ := net.ParseCIDR(node1IPv4) + podNodeSNAT := &nbdb.NAT{ + UUID: "node-nat-UUID1", + LogicalIP: newEgressPodIP, + ExternalIP: ipv4Addr.String(), + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", }, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + node1GR.Nat = []string{podNodeSNAT.UUID} + podAddr = fmt.Sprintf("%s %s", newEgressPodPortInfo.mac.String(), newEgressPodIP) + podLSP.PortSecurity = []string{podAddr} + podLSP.Addresses = []string{podAddr} + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[1:])) + + ginkgo.By("trigger a forced retry and ensure deletion of oldPod and creation of newPod are successful") + // let us add back the annotation to the oldPod which is being retried to make deletion a success + podKey, err := retry.GetResourceKey(egressPod1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + retry.CheckRetryObjectEventually(podKey, true, fakeOvn.controller.retryEgressIPPods) + retryOldObj := retry.GetOldObjFromRetryObj(podKey, fakeOvn.controller.retryEgressIPPods) + //fakeOvn.controller.retryEgressIPPods.retryEntries.LoadOrStore(podKey, &RetryObjEntry{backoffSec: 1}) + pod, _ := retryOldObj.(*v1.Pod) + pod.Annotations = oldAnnotation + fakeOvn.controller.retryEgressIPPods.RequestRetryObjs() + // there should also be no entry for this pod in the retry cache + gomega.Eventually(func() bool { + return retry.CheckRetryObj(podKey, fakeOvn.controller.retryEgressIPPods) + }, retry.RetryObjInterval+time.Second).Should(gomega.BeFalse()) + + // ensure that egressIP setup is being done with the new pod's information from logicalPortCache + podReRoutePolicy.Match = fmt.Sprintf("ip4.src == %s", newEgressPodIP) + podEIPSNAT.LogicalIP = newEgressPodIP + node1GR.Nat = []string{podEIPSNAT.UUID} + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[:len(finalDatabaseStatewithPod)-1])) return nil } @@ -2582,274 +3984,618 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should remove OVN pod egress setup when EgressIP stops matching", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + ginkgotable.DescribeTable("egressIP pod managed by multiple objects, verify standby works wells, verify syncPodAssignmentCache on restarts", + func(interconnect bool, node1Zone, node2Zone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + config.Gateway.DisableSNATMultipleGWs = true + + egressIP1 := "192.168.126.25" + egressIP2 := "192.168.126.30" + egressIP3 := "192.168.126.35" + node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.13/24" + + egressPod1 := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:50", "ip-address":"192.168.126.13/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.3/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, }, }, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + } + + eIP2 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName2), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP3}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, }, - }, - } - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node2GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + node2LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + } - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + node1GR, node2GR, + node1LSP, node2LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + node1Switch, + &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP1, eIP2}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod1}, + }, + ) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ + // we don't know the real switch UUID in the db, but it can be found by name + sw1UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) + sw2UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node2.Name) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, sw1UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddSwitch(node2.Name, sw2UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node1Name, egressIP1) + + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP3, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName2, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Get(context.TODO(), egressPod1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + podAddr := fmt.Sprintf("%s %s", egressPodPortInfo.mac.String(), egressPodIP[0].String()) + + // Ensure first egressIP object is assigned, since only node1 is an egressNode, only 1IP will be assigned, other will be pending + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + egressIPs1, nodes1 := getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs1[0]).To(gomega.Equal(egressIP1)) + + // Ensure second egressIP object is also assigned to node1, but no OVN config will be done for this + gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) + egressIPs2, nodes2 := getEgressIPStatus(egressIPName2) + gomega.Expect(nodes2[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) + recordedEvent := <-fakeOvn.fakeRecorder.Events + gomega.Expect(recordedEvent).To(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) + + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + + assginedEIP := egressIPs1[0] + gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: egressPodIP[0].String(), + ExternalIP: assginedEIP, + ExternalIDs: map[string]string{ + "name": pas.egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.String("k8s-node1"), + Options: map[string]string{ + "stateless": "false", + }, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), + Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, + Nexthops: nodeLogicalRouterIPv4, ExternalIDs: map[string]string{ - "name": eIP.Name, + "name": pas.egressIPName, }, - UUID: "reroute-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), + UUID: "reroute-UUID1", + } + node1GR.Nat = []string{"egressip-nat-UUID1"} + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + if node1Zone != node2Zone && node1Zone == "remote" { + // GARP for remote zones are taken care of by remote controller + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + } + } + if !interconnect || node2Zone == "global" { + node2LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, + podReRoutePolicy, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, + node1GR, node2GR, + node1LSP, node2LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + node1Switch, + &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + }, + } + podLSP := &nbdb.LogicalSwitchPort{ + UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", + Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), + Addresses: []string{podAddr}, ExternalIDs: map[string]string{ - "name": egressIPName, + "pod": "true", + "namespace": egressPod1.Namespace, + }, + Options: map[string]string{ + "requested-chassis": egressPod1.Spec.NodeName, + "iface-id-ver": egressPod1.Name, + }, + PortSecurity: []string{podAddr}, + } + node1Switch.Ports = []string{podLSP.UUID} + finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) + if node1Zone == "remote" { + // policy is not visible since podNode is in remote zone + finalDatabaseStatewithPod[4].(*nbdb.LogicalRouter).Policies = []string{"no-reroute-UUID", "no-reroute-service-UUID"} + finalDatabaseStatewithPod = finalDatabaseStatewithPod[2:] + podEIPSNAT.ExternalIP = "192.168.126.12" // EIP SNAT is not visible since podNode is remote, SNAT towards nodeIP is visible. + podEIPSNAT.LogicalPort = nil + podNodeSNAT := &nbdb.NAT{ + UUID: "node-nat-UUID1", + LogicalIP: egressPodIP[0].String(), + ExternalIP: "192.168.126.12", + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", + }, + } + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + node1GR.Nat = []string{"node-nat-UUID1"} + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) + + // Make second node egressIP assignable + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status = []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP1, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ensure secondIP from first object gets assigned to node2 + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs1, nodes1 = getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[1]).To(gomega.Equal(node2.Name)) + gomega.Expect(egressIPs1[1]).To(gomega.Equal(egressIP2)) + + podEIPSNAT2 := &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: egressPodIP[0].String(), + ExternalIP: egressIPs1[1], + ExternalIDs: map[string]string{ + "name": pas.egressIPName, }, Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, + LogicalPort: utilpointer.String("k8s-node2"), Options: map[string]string{ "stateless": "false", }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + } + podReRoutePolicy.Nexthops = []string{nodeLogicalRouterIPv4[0], node2LogicalRouterIPv4[0]} + if node2Zone == "remote" { + // the policy reroute will have its second nexthop as transit switchIP + // so the one with join switchIP is where podNode == egressNode and one with transitIP is where podNode != egressNode + podReRoutePolicy.Nexthops = []string{"100.64.0.2", "168.254.0.3"} + } + if node1Zone == "remote" { + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, getReRouteStaticRoute(egressPodIP[0].String(), node2LogicalRouterIPv4[0])) + finalDatabaseStatewithPod[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + } + if !interconnect || node2Zone == "global" { + node2GR.Nat = []string{"egressip-nat-UUID2"} + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podEIPSNAT2) + } - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - namespaceUpdate := newNamespace(namespace) + // check the state of the cache for podKey + pas = getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.TODO(), namespaceUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) + gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } + // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first + fakeOvn.controller.eIPC.podAssignmentMutex.Lock() + fakeOvn.controller.eIPC.podAssignment = make(map[string]*podAssignmentState) // replicates controller startup state + fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + egressIPCache, err := fakeOvn.controller.generateCacheForEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.syncPodAssignmentCache(egressIPCache) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.It("should not remove OVN pod egress setup when EgressIP stops matching, but pod never had any IP to begin with", func() { - app.Action = func(ctx *cli.Context) error { + pas = getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + gomega.Expect(pas.egressStatuses).To(gomega.Equal(map[egressipv1.EgressIPStatusItem]string{})) + gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + + // reset egressStatuses for rest of the test to progress correctly + fakeOvn.controller.eIPC.podAssignmentMutex.Lock() + fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses[eip1Obj.Status.Items[0]] = "" + fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses[eip1Obj.Status.Items[1]] = "" + fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() + + // delete the standby egressIP object to make sure the cache is updated + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + gomega.Eventually(func() bool { + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + return pas.standbyEgressIPNames.Has(egressIPName2) + }).Should(gomega.BeFalse()) + gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) - fakeOvn.startWithDBSetup(clusterRouterDbSetup, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) + // add back the standby egressIP object + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status = []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP3, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName2, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + gomega.Eventually(func() bool { + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + return pas.standbyEgressIPNames.Has(egressIPName2) + }).Should(gomega.BeTrue()) + gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) + gomega.Eventually(func() string { + return <-fakeOvn.fakeRecorder.Events + }).Should(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) + + gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) + egressIPs2, nodes2 = getEgressIPStatus(egressIPName2) + gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) + assginedNodeForEIPObj2 := nodes2[0] + + // Delete the IP from object1 that was on node1 and ensure standby is not taking over + eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta("egressip"), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + ipOnNode1 := assginedEIP + var ipOnNode2 string + if ipOnNode1 == egressIP1 { + ipOnNode2 = egressIP2 + } else { + ipOnNode2 = egressIP1 + } + eIPUpdate.Spec.EgressIPs = []string{ipOnNode2} + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, ipOnNode2) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs1, nodes1 = getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[0]).To(gomega.Equal(node2.Name)) + gomega.Expect(egressIPs1[0]).To(gomega.Equal(ipOnNode2)) + + // check if the setup for firstIP from object1 is deleted properly + podReRoutePolicy.Nexthops = node2LogicalRouterIPv4 + if node2Zone == "remote" { + // the policy reroute will have its second nexthop as transit switchIP + // so the one with join switchIP is where podNode == egressNode and one with transitIP is where podNode != egressNode + podReRoutePolicy.Nexthops = []string{"168.254.0.3"} + } + podNodeSNAT := &nbdb.NAT{ + UUID: "node-nat-UUID1", + LogicalIP: egressPodIP[0].String(), + ExternalIP: "192.168.126.12", // adds back SNAT to nodeIP + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", }, - }, - } + } + if node1Zone != "remote" { + node1GR.Nat = []string{podNodeSNAT.UUID} + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[1:])) + } else { + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) + } - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + return pas.standbyEgressIPNames.Has(egressIPName2) + }).Should(gomega.BeTrue()) + gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // delete the first egressIP object and make sure the cache is updated + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + // ensure standby takes over and we do the setup for it in OVN DB + gomega.Eventually(func() bool { + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + return pas.standbyEgressIPNames.Has(egressIPName2) + }).Should(gomega.BeFalse()) + gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName2)) + + finalDatabaseStatewithPod = expectedDatabaseStatewithPod + finalDatabaseStatewithPod = append(expectedDatabaseStatewithPod, podLSP) + podEIPSNAT.ExternalIP = egressIP3 + podEIPSNAT.ExternalIDs = map[string]string{ + "name": egressIPName2, + } + podReRoutePolicy.ExternalIDs = map[string]string{ + "name": egressIPName2, + } + if assginedNodeForEIPObj2 == node2.Name { + podEIPSNAT.LogicalPort = utilpointer.String("k8s-node2") + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + node1GR.Nat = []string{podNodeSNAT.UUID} + node2GR.Nat = []string{podEIPSNAT.UUID} + } + if assginedNodeForEIPObj2 == node1.Name { + podReRoutePolicy.Nexthops = nodeLogicalRouterIPv4 + node1GR.Nat = []string{podEIPSNAT.UUID} + node2GR.Nat = []string{} + } + if node1Zone == "remote" { + // policy is not visible since podNode is in remote zone + finalDatabaseStatewithPod[4].(*nbdb.LogicalRouter).Policies = []string{"no-reroute-UUID", "no-reroute-service-UUID"} + finalDatabaseStatewithPod = finalDatabaseStatewithPod[2:] + podEIPSNAT.ExternalIP = "192.168.126.12" // EIP SNAT is not visible since podNode is remote, SNAT towards nodeIP is visible. + podEIPSNAT.LogicalPort = nil + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + node1GR.Nat = []string{"node-nat-UUID1"} + finalDatabaseStatewithPod[2].(*nbdb.LogicalRouter).StaticRoutes = []string{} + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + // delete the second egressIP object to make sure the cache is updated podKey should be gone since nothing is managing it anymore + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - namespaceUpdate := newNamespace(namespace) + gomega.Eventually(func() bool { + return getPodAssignmentState(&egressPod1) != nil + }).Should(gomega.BeFalse()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.TODO(), namespaceUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - return nil - } + // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first + fakeOvn.controller.eIPC.podAssignmentMutex.Lock() + fakeOvn.controller.eIPC.podAssignment = make(map[string]*podAssignmentState) // replicates controller startup state + fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() + + egressIPCache, err = fakeOvn.controller.generateCacheForEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.syncPodAssignmentCache(egressIPCache) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // we don't have any egressIPs, so cache is nil + gomega.Eventually(func() bool { + return getPodAssignmentState(&egressPod1) != nil + }).Should(gomega.BeFalse()) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global", "global"), + ginkgotable.Entry("interconnect enabled; node1 and node2 in global zones", true, "global", "global"), + // will showcase localzone setup - master is in pod's zone where pod's reroute policy towards egressNode will be done. + // NOTE: SNAT won't be visible because its in remote zone + ginkgotable.Entry("interconnect enabled; node1 in global and node2 in remote zones", true, "global", "remote"), + // will showcase localzone setup - master is in egress node's zone where pod's SNAT policy and static route will be done. + // NOTE: reroute policy won't be visible because its in remote zone (pod is in remote zone) + ginkgotable.Entry("interconnect enabled; node1 in remote and node2 in global zones", true, "remote", "global"), + ) - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) }) - ginkgo.Context("on EgressIP UPDATE", func() { - ginkgo.It("should update OVN on EgressIP .spec.egressips change", func() { - app.Action = func(ctx *cli.Context) error { + ginkgo.Context("WatchEgressNodes running with WatchEgressIP", func() { - egressIP1 := "192.168.126.101" - egressIP2 := "192.168.126.102" - egressIP3 := "192.168.126.103" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + ginkgo.It("should treat un-assigned EgressIPs when it is tagged", func() { + app.Action = func(ctx *cli.Context) error { - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + egressIP := "192.168.126.101" + nodeIPv4 := "192.168.126.51/24" + nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node1 := v1.Node{ + node := v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: node1Name, Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), }, }, Status: v1.NodeStatus{ @@ -2865,15 +4611,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, - }, + EgressIPs: []string{egressIP}, }, Status: egressipv1.EgressIPStatus{ Items: []egressipv1.EgressIPStatusItem{}, @@ -2883,60 +4621,31 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, }, }, }, }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, + &v1.NodeList{ + Items: []v1.Node{node}, }) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() @@ -2946,22 +4655,12 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(2)) - egressIPs, nodes := getEgressIPStatus(eIP.Name) - assignmentNode1, assignmentNode2 := nodes[0], nodes[1] - assignedEgressIP1, assignedEgressIP2 := egressIPs[0], egressIPs[1] - - expectedNatLogicalPort1 := fmt.Sprintf("k8s-%s", assignmentNode1) - expectedNatLogicalPort2 := fmt.Sprintf("k8s-%s", assignmentNode2) expectedDatabaseState := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", + UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -2969,83 +4668,21 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2", "100.64.0.3"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-1-UUID", - LogicalIP: podV4IP, - ExternalIP: assignedEgressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-2-UUID", - LogicalIP: podV4IP, - ExternalIP: assignedEgressIP2, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort2, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + assignmentNode1, - UUID: ovntypes.GWRouterPrefix + assignmentNode1 + "-UUID", - Nat: []string{"egressip-nat-1-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + assignmentNode2, - UUID: ovntypes.GWRouterPrefix + assignmentNode2 + "-UUID", - Nat: []string{"egressip-nat-2-UUID"}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", }, @@ -3053,29 +4690,29 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - latest, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - latest.Spec.EgressIPs = []string{egressIP3, egressIP2} - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), latest, metav1.UpdateOptions{}) + gomega.Eventually(eIP.Status.Items).Should(gomega.HaveLen(0)) + + node.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() []string { - egressIPs, _ = getEgressIPStatus(eIP.Name) - return egressIPs - }).Should(gomega.ConsistOf(egressIP3, egressIP2)) + fakeOvn.patchEgressIPObj(node1Name, egressIP) - egressIPs, nodes = getEgressIPStatus(eIP.Name) - assignmentNode1, assignmentNode2 = nodes[0], nodes[1] - assignedEgressIP1, assignedEgressIP2 = egressIPs[0], egressIPs[1] + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - expectedNatLogicalPort1 = fmt.Sprintf("k8s-%s", assignmentNode1) - expectedNatLogicalPort2 = fmt.Sprintf("k8s-%s", assignmentNode2) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", + UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -3083,83 +4720,21 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2", "100.64.0.3"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-1-UUID", - LogicalIP: podV4IP, - ExternalIP: assignedEgressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-2-UUID", - LogicalIP: podV4IP, - ExternalIP: assignedEgressIP2, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort2, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + assignmentNode1, - UUID: ovntypes.GWRouterPrefix + assignmentNode1 + "-UUID", - Nat: []string{"egressip-nat-1-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + assignmentNode2, - UUID: ovntypes.GWRouterPrefix + assignmentNode2 + "-UUID", - Nat: []string{"egressip-nat-2-UUID"}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", }, @@ -3173,158 +4748,134 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should delete and re-create", func() { + ginkgo.It("should result in error and event if specified egress IP is a cluster node IP", func() { app.Action = func(ctx *cli.Context) error { - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - updatedEgressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8ffd") - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - + egressIP := "192.168.126.51" + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, }, }, }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, }, - } + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - expectedNatLogicalPort := "k8s-node2" expectedDatabaseState := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - - eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - eIPUpdate.Spec = egressipv1.EgressIPSpec{ - EgressIPs: []string{ - updatedEgressIP.String(), + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, } - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(func() []string { - egressIPs, _ = getEgressIPStatus(eIP.Name) - return egressIPs - }).Should(gomega.ContainElement(updatedEgressIP.String())) - - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) return nil } @@ -3332,127 +4883,94 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - }) - - ginkgo.Context("WatchEgressNodes", func() { - - ginkgo.It("should populated egress node data as they are tagged `egress assignable` with variants of IPv4/IPv6", func() { + ginkgo.It("should re-assigned EgressIPs when more nodes get tagged if the first assignment attempt wasn't fully successful", func() { app.Action = func(ctx *cli.Context) error { - node1IPv4 := "192.168.128.202/24" - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" + egressIP1 := "192.168.126.25" + egressIP2 := "192.168.126.30" + node1IPv4 := "192.168.126.51/24" + node2IPv4 := "192.168.126.101/24" - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, }, } - fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - }, - }, - }) - err := fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) - - node1.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, ip1V4Sub, err := net.ParseCIDR(node1IPv4) - _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) - _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) - - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node2.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "no-reroute-service-UUID"}, - }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "reroute-UUID", + UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -3460,6 +4978,11 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", @@ -3490,115 +5013,20 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("using retry to create egress node with forced error followed by an update", func() { - app.Action = func(ctx *cli.Context) error { - nodeIPv4 := "192.168.126.51/24" - nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node", - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + nodeName, - }, - }, - }, - }) - err := fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) - _, ipV4Sub, err := net.ParseCIDR(nodeIPv4) - _, ipV6Sub, err := net.ParseCIDR(nodeIPv6) - node.Labels = map[string]string{ + node2.Labels = map[string]string{ "k8s.ovn.org/egress-assignable": "", } - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Bringing down NBDB") - // inject transient problem, nbdb is down - fakeOvn.controller.nbClient.Close() - gomega.Eventually(func() bool { - return fakeOvn.controller.nbClient.Connected() - }).Should(gomega.BeFalse()) - - // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail - // there is a chance that both egressnode events(node1 removal and node2 update) will end up in the same event queue - // sleep for double the time to allow for two consecutive TransactWithRetry timeouts - time.Sleep(2 * (types.OVSDBTimeout + time.Second)) - // check to see if the retry cache has an entry - key, err := retry.GetResourceKey(&node) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - retry.CheckRetryObjectEventually(key, true, fakeOvn.controller.retryEgressNodes) - ginkgo.By("retry entry: old obj should be nil, new obj should not be nil") - retry.CheckRetryObjectMultipleFieldsEventually( - key, - fakeOvn.controller.retryEgressNodes, - gomega.BeNil(), // oldObj should be nil - gomega.Not(gomega.BeNil()), // newObj should not be nil - ) - - node.Labels = map[string]string{} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) - defer cancel() - resetNBClient(connCtx, fakeOvn.controller.nbClient) - retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressNodes) - fakeOvn.controller.retryEgressNodes.RequestRetryObjs() - // check the cache no longer has the entry - retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressNodes) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node.Name].egressIPConfig.V4.Net).To(gomega.Equal(ipV4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node.Name].egressIPConfig.V6.Net).To(gomega.Equal(ipV6Sub)) - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "no-reroute-service-UUID"}, - }, + // note: since there are no egressIP pods created in this test, we didn't need to manually patch the status. + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "reroute-UUID", + UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -3607,15 +5035,36 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + nodeName, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } @@ -3627,39 +5076,29 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("egressIP pod recreate with same name (stateful-sets) shouldn't use stale logicalPortCache entries", func() { + ginkgo.It("should remove stale EgressIP setup when node label is removed while ovnkube-master is not running and assign to newly labelled node", func() { app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - - egressIP1 := "192.168.126.101" - node1IPv4 := "192.168.126.12/24" + egressIP1 := "192.168.126.25" + node1IPv4 := "192.168.126.51/24" - egressPod1 := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels := map[string]string{} + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", } + node2 := getNodeObj(node2Name, annotations, labels) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -3683,27 +5122,31 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, } - nodeSwitch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + Name: types.OVNClusterRouter, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{nodeLogicalRouterIfAddrV4}, }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, @@ -3712,29 +5155,35 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, }, }, - nodeSwitch, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, }, }, &egressipv1.EgressIPList{ Items: []egressipv1.EgressIP{eIP}, }, &v1.NodeList{ - Items: []v1.Node{node1}, + Items: []v1.Node{node1, node2}, }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod1}, + Items: []v1.Pod{egressPod}, }, ) - // we don't know the real switch UUID in the db, but it can be found by name - swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - err := fakeOvn.controller.WatchPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -3743,20 +5192,17 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - podAddr := fmt.Sprintf("%s %s", egressPodPortInfo.mac.String(), egressPodIP) + fakeOvn.patchEgressIPObj(node2Name, egressIP1) - expectedNatLogicalPort1 := "k8s-node1" - expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", + UUID: "default-no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -3766,103 +5212,50 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, &nbdb.LogicalRouterPolicy{ Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPodIP), + Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), Action: nbdb.LogicalRouterPolicyActionReroute, Nexthops: nodeLogicalRouterIPv4, ExternalIDs: map[string]string{ "name": eIP.Name, }, - UUID: "reroute-UUID1", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1"}, + UUID: "reroute-UUID", }, &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: egressPodIP.String(), + UUID: "egressip-nat-UUID", + LogicalIP: podV4IP, ExternalIP: egressIP1, ExternalIDs: map[string]string{ "name": egressIPName, }, Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, + LogicalPort: &expectedNatLogicalPort, Options: map[string]string{ "stateless": "false", }, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - nodeSwitch, - } - podLSP := &nbdb.LogicalSwitchPort{ - UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", - Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), - Addresses: []string{podAddr}, - ExternalIDs: map[string]string{ - "pod": "true", - "namespace": egressPod1.Namespace, - }, - Options: map[string]string{ - "requested-chassis": egressPod1.Spec.NodeName, - "iface-id-ver": egressPod1.Name, - }, - PortSecurity: []string{podAddr}, - } - nodeSwitch.Ports = []string{podLSP.UUID} - finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - _, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // delete the pod - err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Delete(context.TODO(), - egressPod1.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseStateWithoutPod := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", @@ -3874,33 +5267,19 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "exclude-lb-vips-from-garp": "true", }, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - Ports: []string{}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithoutPod)) - // recreate pod with same name immediately; simulating handler race (pods v/s egressip) condition, - // so instead of proper pod create, we try out egressIP pod setup which will be a no-op since pod doesn't exist - ginkgo.By("should not add egress IP setup for a deleted pod whose entry exists in logicalPortCache") - err = fakeOvn.controller.addPodEgressIPAssignments(egressIPName, eIP.Status.Items, &egressPod1) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // pod is gone but logicalPortCache holds the entry for 60seconds - egressPodPortInfo, err = fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeFalse()) - staleEgressPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(staleEgressPodIP).To(gomega.Equal(egressPodIP)) - // no-op - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithoutPod)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -3908,42 +5287,22 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("egressIP pod recreate with same name (stateful-sets) shouldn't use stale logicalPortCache entries AND stale podAssignment cache entries", func() { + ginkgo.It("should remove stale EgressIP setup when pod is deleted while ovnkube-master is not running", func() { app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - - egressIP1 := "192.168.126.101" - node1IPv4 := "192.168.126.12/24" + egressIP1 := "192.168.126.25" + node1IPv4 := "192.168.126.51/24" - oldEgressPodIP := "10.128.0.50" - egressPod1 := newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) - oldAnnotation := map[string]string{"k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.128.0.50/24"],"mac_address":"0a:58:0a:80:00:05","gateway_ips":["10.128.0.1"],"routes":[{"dest":"10.128.0.0/24","nextHop":"10.128.0.1"}],"ip_address":"10.128.0.50/24","gateway_ip":"10.128.0.1"}}`} - egressPod1.Annotations = oldAnnotation egressNamespace := newNamespace(namespace) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -3967,26 +5326,48 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, } - nodeSwitch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } + expectedNatLogicalPort := "k8s-node1" fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "keep-me-UUID", + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Priority: types.DefaultNoRereoutePriority, + Action: nbdb.LogicalRouterPolicyActionAllow, + }, + &nbdb.LogicalRouterPolicy{ + UUID: "remove-me-UUID", + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + Match: "ip.src == 10.128.3.8", + Priority: types.EgressIPReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + }, &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"remove-me-UUID", "keep-me-UUID"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podV4IP, + ExternalIP: egressIP1, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", @@ -3996,7 +5377,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, }, }, - nodeSwitch, }, }, &egressipv1.EgressIPList{ @@ -4008,64 +5388,24 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, - &v1.PodList{ - Items: []v1.Pod{*egressPod1}, - }, ) - // we don't know the real switch UUID in the db, but it can be found by name - swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - fakeOvn.controller.WatchPods() - fakeOvn.controller.WatchEgressIPNamespaces() - fakeOvn.controller.WatchEgressIPPods() - fakeOvn.controller.WatchEgressNodes() - fakeOvn.controller.WatchEgressIP() - - oldEgressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(egressPod1, types.DefaultNetworkName) + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, _, err := net.ParseCIDR(oldEgressPodPortInfo.ips[0].String()) + err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressPodIP.String()).To(gomega.Equal(oldEgressPodIP)) - gomega.Expect(oldEgressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - podAddr := fmt.Sprintf("%s %s", oldEgressPodPortInfo.mac.String(), egressPodIP) - - expectedNatLogicalPort1 := "k8s-node1" - podEIPSNAT := &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: egressPodIP.String(), - ExternalIP: egressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy := &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", oldEgressPodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID1", - } - node1GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1"}, - } - expectedDatabaseStatewithPod := []libovsdbtest.TestData{ - podEIPSNAT, + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + expectedDatabaseState := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, + UUID: "keep-me-UUID", Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Priority: types.DefaultNoRereoutePriority, Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -4073,13 +5413,16 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, - podReRoutePolicy, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + Policies: []string{"keep-me-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - node1GR, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, @@ -4090,132 +5433,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "exclude-lb-vips-from-garp": "true", }, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - nodeSwitch, - } - podLSP := &nbdb.LogicalSwitchPort{ - UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", - Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), - Addresses: []string{podAddr}, - ExternalIDs: map[string]string{ - "pod": "true", - "namespace": egressPod1.Namespace, - }, - Options: map[string]string{ - "requested-chassis": egressPod1.Spec.NodeName, - "iface-id-ver": egressPod1.Name, - }, - PortSecurity: []string{podAddr}, - } - nodeSwitch.Ports = []string{podLSP.UUID} - finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - _, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // delete the pod and simulate a cleanup failure: - // 1) create a situation where pod is gone from kapi but egressIP setup wasn't cleanedup due to deletion error - // - we remove annotation from pod to mimic this situation - // 2) leaves us with a stale podAssignment cache - // 3) check to make sure the logicalPortCache is used always even if podAssignment already has the podKey - ginkgo.By("delete the egress IP pod and force the deletion to fail") - egressPod1.Annotations = map[string]string{} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Update(context.TODO(), egressPod1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Wait for the cleared annotations to show up client-side - gomega.Eventually(func() int { - egressPod1, _ = fakeOvn.watcher.GetPod(egressPod1.Namespace, egressPod1.Name) - return len(egressPod1.Annotations) - }, 5).Should(gomega.Equal(0)) - - // Delete the pod to trigger the cleanup failure - err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Delete(context.TODO(), - egressPod1.Name, metav1.DeleteOptions{}) - // internally we have an error: - // E1006 12:51:59.594899 2500972 obj_retry.go:1517] Failed to delete *factory.egressIPPod egressip-namespace/egress-pod, error: pod egressip-namespace/egress-pod: no pod IPs found - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // notice that pod objects aren't cleaned up yet since deletion failed! - // even the LSP sticks around for 60 seconds - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - // egressIP cache is stale in the sense the podKey has not been deleted since deletion failed - pas := getPodAssignmentState(egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - gomega.Expect(pas.egressStatuses).To(gomega.Equal(map[egressipv1.EgressIPStatusItem]string{ - { - Node: "node1", - EgressIP: "192.168.126.101", - }: "", - })) - // recreate pod with same name immediately; - ginkgo.By("should add egress IP setup for the NEW pod which exists in logicalPortCache") - newEgressPodIP := "10.128.0.60" - egressPod1 = newPodWithLabels(namespace, podName, node1Name, newEgressPodIP, egressPodLabel) - egressPod1.Annotations = map[string]string{"k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.128.0.60/24"],"mac_address":"0a:58:0a:80:00:06","gateway_ips":["10.128.0.1"],"routes":[{"dest":"10.128.0.0/24","nextHop":"10.128.0.1"}],"ip_address":"10.128.0.60/24","gateway_ip":"10.128.0.1"}}`} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Create(context.TODO(), egressPod1, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // wait for the logical port cache to get updated with the new pod's IP - var newEgressPodPortInfo *lpInfo - getEgressPodIP := func() string { - newEgressPodPortInfo, err = fakeOvn.controller.logicalPortCache.get(egressPod1, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, _, err := net.ParseCIDR(newEgressPodPortInfo.ips[0].String()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - return egressPodIP.String() - } - gomega.Eventually(func() string { - return getEgressPodIP() - }).Should(gomega.Equal(newEgressPodIP)) - gomega.Expect(newEgressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - - // deletion for the older EIP pod object is still being retried so we still have SNAT - // towards nodeIP for new pod which is created by addLogicalPort. - // Note that we while have the stale re-route policy for old pod, the snat for the old pod towards egressIP is gone - // because deleteLogicalPort removes ALL snats for a given pod but doesn't remove the policies. - ipv4Addr, _, _ := net.ParseCIDR(node1IPv4) - podNodeSNAT := &nbdb.NAT{ - UUID: "node-nat-UUID1", - LogicalIP: newEgressPodIP, - ExternalIP: ipv4Addr.String(), - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, } - finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) - node1GR.Nat = []string{podNodeSNAT.UUID} - podAddr = fmt.Sprintf("%s %s", newEgressPodPortInfo.mac.String(), newEgressPodIP) - podLSP.PortSecurity = []string{podAddr} - podLSP.Addresses = []string{podAddr} - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[1:])) - - ginkgo.By("trigger a forced retry and ensure deletion of oldPod and creation of newPod are successful") - // let us add back the annotation to the oldPod which is being retried to make deletion a success - podKey, err := retry.GetResourceKey(egressPod1) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - retry.CheckRetryObjectEventually(podKey, true, fakeOvn.controller.retryEgressIPPods) - retryOldObj := retry.GetOldObjFromRetryObj(podKey, fakeOvn.controller.retryEgressIPPods) - //fakeOvn.controller.retryEgressIPPods.retryEntries.LoadOrStore(podKey, &RetryObjEntry{backoffSec: 1}) - pod, _ := retryOldObj.(*kapi.Pod) - pod.Annotations = oldAnnotation - fakeOvn.controller.retryEgressIPPods.RequestRetryObjs() - // there should also be no entry for this pod in the retry cache - gomega.Eventually(func() bool { - return retry.CheckRetryObj(podKey, fakeOvn.controller.retryEgressIPPods) - }, retry.RetryObjInterval+time.Second).Should(gomega.BeFalse()) - - // ensure that egressIP setup is being done with the new pod's information from logicalPortCache - podReRoutePolicy.Match = fmt.Sprintf("ip4.src == %s", newEgressPodIP) - podEIPSNAT.LogicalIP = newEgressPodIP - node1GR.Nat = []string{podEIPSNAT.UUID} - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[:len(finalDatabaseStatewithPod)-1])) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -4223,85 +5442,30 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("egressIP pod managed by multiple objects, verify standby works wells, verify syncPodAssignmentCache on restarts", func() { + ginkgo.It("should remove stale pod SNAT referring to wrong logical port after ovnkube-master is started", func() { app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - - egressIP1 := "192.168.126.25" - egressIP2 := "192.168.126.30" - egressIP3 := "192.168.126.35" + egressIP := "192.168.126.25" node1IPv4 := "192.168.126.12/24" - node2IPv4 := "192.168.126.13/24" - egressPod1 := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) egressNamespace := newNamespace(namespace) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", } - - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:50", "ip-address":"192.168.126.13/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", } + node1 := getNodeObj(node1Name, annotations, labels) - eIP1 := egressipv1.EgressIP{ + eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, - }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - eIP2 := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName2), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP3}, + EgressIPs: []string{egressIP}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -4324,10 +5488,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", } - node2GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - } node1LSP := &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, @@ -4336,15 +5496,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, }, } - node2LSP := &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - } - fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ @@ -4352,44 +5503,49 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, - node1GR, node2GR, - node1LSP, node2LSP, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, + node1GR, + node1LSP, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{"100.64.0.2/29"}, }, node1Switch, - &nbdb.LogicalSwitch{ - UUID: node2.Name + "-UUID", - Name: node2.Name, + // This is unexpected snat entry where its logical port refers to an unavailable node + // and ensure this entry is removed as soon as ovnk master is up and running. + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.String("k8s-node2"), + Options: map[string]string{ + "stateless": "false", + }, }, }, }, &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP1, eIP2}, + Items: []egressipv1.EgressIP{eIP}, }, &v1.NodeList{ - Items: []v1.Node{node1, node2}, + Items: []v1.Node{node1}, }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod1}, + Items: []v1.Pod{egressPod}, }, ) - // we don't know the real switch UUID in the db, but it can be found by name - sw1UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - sw2UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node2.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, sw1UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - fakeOvn.controller.lsManager.AddSwitch(node2.Name, sw2UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPNamespaces() @@ -4401,9 +5557,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Get(context.TODO(), egressPod1.Name, metav1.GetOptions{}) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -4411,324 +5567,45 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - podAddr := fmt.Sprintf("%s %s", egressPodPortInfo.mac.String(), egressPodIP[0].String()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - // Ensure first egressIP object is assigned, since only node1 is an egressNode, only 1IP will be assigned, other will be pending - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - recordedEvent := <-fakeOvn.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring("Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", eIP1.Name)) - egressIPs1, nodes1 := getEgressIPStatus(egressIPName) - gomega.Expect(nodes1[0]).To(gomega.Equal(node1.Name)) - possibleAssignments := sets.NewString(egressIP1, egressIP2) - gomega.Expect(possibleAssignments.Has(egressIPs1[0])).To(gomega.BeTrue()) - - // Ensure second egressIP object is also assigned to node1, but no OVN config will be done for this - gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) - egressIPs2, nodes2 := getEgressIPStatus(egressIPName2) - gomega.Expect(nodes2[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) - recordedEvent = <-fakeOvn.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) - - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - - assginedEIP := egressIPs1[0] - gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - eip1Obj, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) - gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - podEIPSNAT := &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: egressPodIP[0].String(), - ExternalIP: assginedEIP, - ExternalIDs: map[string]string{ - "name": pas.egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node1"), - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy := &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": pas.egressIPName, - }, - UUID: "reroute-UUID1", - } - node1GR.Nat = []string{"egressip-nat-UUID1"} + podEIPSNAT := getEIPSNAT(podV4IP, egressIP, "k8s-node1") + podReRoutePolicy := getReRoutePolicy(egressPodIP[0].String(), "4", nodeLogicalRouterIPv4) + node1GR.Nat = []string{"egressip-nat-UUID"} node1LSP.Options = map[string]string{ "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", } expectedDatabaseStatewithPod := []libovsdbtest.TestData{ - podEIPSNAT, - &nbdb.LogicalRouterPolicy{ + podEIPSNAT, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ + }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, - podReRoutePolicy, - &nbdb.LogicalRouter{ + }, podReRoutePolicy, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, - }, - node1GR, node2GR, - node1LSP, node2LSP, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID"}, + }, node1GR, node1LSP, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - node1Switch, - &nbdb.LogicalSwitch{ - UUID: node2.Name + "-UUID", - Name: node2.Name, - }, - } - podLSP := &nbdb.LogicalSwitchPort{ - UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", - Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), - Addresses: []string{podAddr}, - ExternalIDs: map[string]string{ - "pod": "true", - "namespace": egressPod1.Namespace, - }, - Options: map[string]string{ - "requested-chassis": egressPod1.Spec.NodeName, - "iface-id-ver": egressPod1.Name, - }, - PortSecurity: []string{podAddr}, - } - node1Switch.Ports = []string{podLSP.UUID} - finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // Make second node egressIP assignable - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // ensure secondIP from first object gets assigned to node2 - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - egressIPs1, nodes1 = getEgressIPStatus(egressIPName) - gomega.Expect(nodes1[1]).To(gomega.Equal(node2.Name)) - gomega.Expect(possibleAssignments.Has(egressIPs1[1])).To(gomega.BeTrue()) - - podEIPSNAT2 := &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: egressPodIP[0].String(), - ExternalIP: egressIPs1[1], - ExternalIDs: map[string]string{ - "name": pas.egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node2"), - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy.Nexthops = []string{nodeLogicalRouterIPv4[0], node2LogicalRouterIPv4[0]} - node2GR.Nat = []string{"egressip-nat-UUID2"} - node2LSP.Options = map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - } - finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podEIPSNAT2) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // check the state of the cache for podKey - pas = getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - - gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) - gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) - - // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first - fakeOvn.controller.eIPC.podAssignmentMutex.Lock() - fakeOvn.controller.eIPC.podAssignment = make(map[string]*podAssignmentState) // replicates controller startup state - fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - - egressIPCache, err := fakeOvn.controller.generateCacheForEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.syncPodAssignmentCache(egressIPCache) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - pas = getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - gomega.Expect(pas.egressStatuses).To(gomega.Equal(map[egressipv1.EgressIPStatusItem]string{})) - gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) - - // reset egressStatuses for rest of the test to progress correctly - fakeOvn.controller.eIPC.podAssignmentMutex.Lock() - fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses[eip1Obj.Status.Items[0]] = "" - fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses[eip1Obj.Status.Items[1]] = "" - fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - - // delete the standby egressIP object to make sure the cache is updated - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeFalse()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - - // add back the standby egressIP object - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeTrue()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - gomega.Eventually(func() string { - return <-fakeOvn.fakeRecorder.Events - }).Should(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) - - gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) - egressIPs2, nodes2 = getEgressIPStatus(egressIPName2) - gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) - assginedNodeForEIPObj2 := nodes2[0] - - // Delete the IP from object1 that was on node1 and ensure standby is not taking over - eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - ipOnNode1 := assginedEIP - var ipOnNode2 string - if ipOnNode1 == egressIP1 { - ipOnNode2 = egressIP2 - } else { - ipOnNode2 = egressIP1 - } - eIPUpdate.Spec.EgressIPs = []string{ipOnNode2} - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs1, nodes1 = getEgressIPStatus(egressIPName) - gomega.Expect(nodes1[0]).To(gomega.Equal(node2.Name)) - gomega.Expect(egressIPs1[0]).To(gomega.Equal(ipOnNode2)) - - // check if the setup for firstIP from object1 is deleted properly - podReRoutePolicy.Nexthops = node2LogicalRouterIPv4 - podNodeSNAT := &nbdb.NAT{ - UUID: "node-nat-UUID1", - LogicalIP: egressPodIP[0].String(), - ExternalIP: "192.168.126.12", // adds back SNAT to nodeIP - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, - } - node1GR.Nat = []string{podNodeSNAT.UUID} - finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[1:])) - - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeTrue()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - - // delete the first egressIP object and make sure the cache is updated - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // ensure standby takes over and we do the setup for it in OVN DB - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeFalse()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName2)) - - finalDatabaseStatewithPod = expectedDatabaseStatewithPod - finalDatabaseStatewithPod = append(expectedDatabaseStatewithPod, podLSP) - podEIPSNAT.ExternalIP = egressIP3 - podEIPSNAT.ExternalIDs = map[string]string{ - "name": egressIPName2, - } - podReRoutePolicy.ExternalIDs = map[string]string{ - "name": egressIPName2, - } - if assginedNodeForEIPObj2 == node2.Name { - podEIPSNAT.LogicalPort = utilpointer.StringPtr("k8s-node2") - finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) - node1GR.Nat = []string{podNodeSNAT.UUID} - node2GR.Nat = []string{podEIPSNAT.UUID} - } - if assginedNodeForEIPObj2 == node1.Name { - podReRoutePolicy.Nexthops = nodeLogicalRouterIPv4 - node1GR.Nat = []string{podEIPSNAT.UUID} - node2GR.Nat = []string{} - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // delete the second egressIP object to make sure the cache is updated podKey should be gone since nothing is managing it anymore - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(func() bool { - return getPodAssignmentState(&egressPod1) != nil - }).Should(gomega.BeFalse()) - - // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first - fakeOvn.controller.eIPC.podAssignmentMutex.Lock() - fakeOvn.controller.eIPC.podAssignment = make(map[string]*podAssignmentState) // replicates controller startup state - fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - - egressIPCache, err = fakeOvn.controller.generateCacheForEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.syncPodAssignmentCache(egressIPCache) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // we don't have any egressIPs, so cache is nil - gomega.Eventually(func() bool { - return getPodAssignmentState(&egressPod1) != nil - }).Should(gomega.BeFalse()) + }, node1Switch} + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) return nil } @@ -4736,28 +5613,35 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should skip populating egress node data for nodes that have incorrect IP address", func() { + ginkgo.It("should only get assigned EgressIPs which matches their subnet when the node is tagged", func() { app.Action = func(ctx *cli.Context) error { - nodeIPv4 := "192.168.126.510/24" - nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, + egressIP := "192.168.126.101" + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, }, } + fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ @@ -4765,345 +5649,46 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, }, }, - &v1.NodeList{ - Items: []v1.Node{node}, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, }, - ) - - allocatorItems := func() int { - return len(fakeOvn.controller.eIPC.allocator.cache) - } + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) - err := fakeOvn.controller.WatchEgressNodes() + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(allocatorItems).Should(gomega.Equal(0)) - - node.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(allocatorItems).Should(gomega.Equal(0)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should probe nodes using grpc", func() { - app.Action = func(ctx *cli.Context) error { - - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", "", node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }) - gomega.Expect(fakeOvn.controller.WatchEgressNodes()).To(gomega.Succeed()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) - - _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) - _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - - cachedEgressNode1 := fakeOvn.controller.eIPC.allocator.cache[node1.Name] - cachedEgressNode2 := fakeOvn.controller.eIPC.allocator.cache[node2.Name] - gomega.Expect(cachedEgressNode1.egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) - gomega.Expect(cachedEgressNode2.egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) - - // Explicitly call check reachibility so we need not to wait for slow periodic timer - checkEgressNodesReachabilityIterate(fakeOvn.controller) - gomega.Expect(cachedEgressNode1.isReachable).To(gomega.BeTrue()) - gomega.Expect(cachedEgressNode2.isReachable).To(gomega.BeTrue()) - - // The test cases below will manipulate the fakeEgressIPHealthClient used for mocking - // a gRPC session dedicated to monitoring each of the 2 nodes created. It does that - // by setting the probe fail boolean which in turn causes the mocked probe call to - // pretend that the periodic monitor succeeded or not. - tests := []struct { - desc string - node1FailProbes bool - node2FailProbes bool - // This function is an optional and generic function for the test case - // to allow any special pre-conditioning needed before invoking of - // checkEgressNodesReachabilityIterate in the test. - tcPrepareFunc func(hcc1, hcc2 *fakeEgressIPHealthClient) - }{ - { - desc: "disconnect nodes", - node1FailProbes: true, - node2FailProbes: true, - tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { - hcc1.Disconnect() - hcc2.Disconnect() - }, - }, - { - desc: "connect node1", - node2FailProbes: true, - }, - { - desc: "node1 connected, connect node2", - }, - { - desc: "node1 and node2 connected, bump only node2 counters", - node1FailProbes: true, - }, - { - desc: "node2 connected, disconnect node1", - node1FailProbes: true, - node2FailProbes: true, - tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { - hcc1.Disconnect() - }, - }, - { - desc: "connect node1, disconnect node2", - node2FailProbes: true, - tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { - hcc2.Disconnect() - }, - }, - { - desc: "node1 and node2 connected and both counters bump", - tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { - // Perform an additional iteration, to make probe counters to bump on second call - checkEgressNodesReachabilityIterate(fakeOvn.controller) - }, - }, - } - - // hcc1 and hcc2 are the mocked gRPC client to node1 and node2, respectively. - // They are what we use to manipulate whether probes to the node should fail or - // not, as well as a mechanism for explicitly disconnecting as part of the test. - hcc1 := cachedEgressNode1.healthClient.(*fakeEgressIPHealthClient) - hcc2 := cachedEgressNode2.healthClient.(*fakeEgressIPHealthClient) - - // ttIterCheck is the common function used by each test case. It will check whether - // a client changed its connection state and if the number of probes to the node - // changed as expected. - ttIterCheck := func(hcc *fakeEgressIPHealthClient, prevNodeIsConnected bool, prevProbes int, failProbes bool, desc string) { - currNodeIsConnected := hcc.IsConnected() - gomega.Expect(currNodeIsConnected || failProbes).To(gomega.BeTrue(), desc) - - if !prevNodeIsConnected && !currNodeIsConnected { - // Not connected (before and after): no probes should be successful - gomega.Expect(hcc.ProbeCount).To(gomega.Equal(prevProbes), desc) - } else if prevNodeIsConnected && currNodeIsConnected { - if failProbes { - // Still connected, but no probes should be successful - gomega.Expect(prevProbes).To(gomega.Equal(hcc.ProbeCount), desc) - } else { - // Still connected and probe counters should be going up - gomega.Expect(prevProbes < hcc.ProbeCount).To(gomega.BeTrue(), desc) - } - } - } - - for _, tt := range tests { - hcc1.FakeProbeFailure = tt.node1FailProbes - hcc2.FakeProbeFailure = tt.node2FailProbes - - prevNode1IsConnected := hcc1.IsConnected() - prevNode2IsConnected := hcc2.IsConnected() - prevNode1Probes := hcc1.ProbeCount - prevNode2Probes := hcc2.ProbeCount - - if tt.tcPrepareFunc != nil { - tt.tcPrepareFunc(hcc1, hcc2) - } - - // Perform connect or probing, depending on the state of the connections - checkEgressNodesReachabilityIterate(fakeOvn.controller) - - ttIterCheck(hcc1, prevNode1IsConnected, prevNode1Probes, tt.node1FailProbes, tt.desc) - ttIterCheck(hcc2, prevNode2IsConnected, prevNode2Probes, tt.node2FailProbes, tt.desc) - } - - gomega.Expect(hcc1.IsConnected()).To(gomega.BeTrue()) - gomega.Expect(hcc2.IsConnected()).To(gomega.BeTrue()) - - // Lastly, remove egress assignable from node 2 and make sure it disconnects - node2.Labels = map[string]string{} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - - // Explicitly call check reachibility so we need not to wait for slow periodic timer - checkEgressNodesReachabilityIterate(fakeOvn.controller) - - gomega.Expect(hcc1.IsConnected()).To(gomega.BeTrue()) - gomega.Expect(hcc2.IsConnected()).To(gomega.BeFalse()) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - }) - - ginkgo.Context("WatchEgressNodes running with WatchEgressIP", func() { - - ginkgo.It("should treat un-assigned EgressIPs when it is tagged", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := "192.168.126.101" - nodeIPv4 := "192.168.126.51/24" - nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) expectedDatabaseState := []libovsdbtest.TestData{ @@ -5125,46 +5710,43 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node.Name)).Should(gomega.BeFalse()) gomega.Eventually(eIP.Status.Items).Should(gomega.HaveLen(0)) - node.Labels = map[string]string{ + node1.Labels = map[string]string{ "k8s.ovn.org/egress-assignable": "", } - _, ipv4Sub, err := net.ParseCIDR(nodeIPv4) - _, ipv6Sub, err := net.ParseCIDR(nodeIPv6) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node.Name)).Should(gomega.BeTrue()) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveLen(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node.Name].egressIPConfig.V4.Net).To(gomega.Equal(ipv4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node.Name].egressIPConfig.V6.Net).To(gomega.Equal(ipv6Sub)) - - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -5184,137 +5766,54 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", }, }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should result in error and event if specified egress IP is a cluster node IP", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := "192.168.126.51" - node1IPv4 := "192.168.128.202/24" - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }) + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseState := []libovsdbtest.TestData{ + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", @@ -5363,12 +5862,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - gomega.Eventually(fakeOvn.fakeRecorder.Events).Should(gomega.HaveLen(3)) return nil } @@ -5376,51 +5869,24 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should re-assigned EgressIPs when more nodes get tagged if the first assignment attempt wasn't fully successful", func() { + ginkgo.It("should try re-assigning EgressIP until all defined egress IPs are assigned", func() { app.Action = func(ctx *cli.Context) error { - egressIP1 := "192.168.126.25" - egressIP2 := "192.168.126.30" - node1IPv4 := "192.168.126.51/24" - node2IPv4 := "192.168.126.101/24" + egressIP1 := "192.168.126.101" + egressIP2 := "192.168.126.102" + node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.51/24" - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -5522,27 +5988,105 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node1Name, egressIP1) + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - recordedEvent := <-fakeOvn.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring("Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", eIP.Name)) - node2.Labels = map[string]string{ "k8s.ovn.org/egress-assignable": "", } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP1, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -5598,13 +6142,14 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should remove stale EgressIP setup when node label is removed while ovnkube-master is not running and assign to newly labelled node", func() { + ginkgo.It("ensure egress ip entries are not created when pod is already moved into completed state", func() { app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.25" - node1IPv4 := "192.168.126.51/24" + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod.Status.Phase = v1.PodSucceeded egressNamespace := newNamespace(namespace) node1 := v1.Node{ @@ -5613,27 +6158,12 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Annotations: map[string]string{ "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, Labels: map[string]string{ "k8s.ovn.org/egress-assignable": "", }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, }, Status: v1.NodeStatus{ Conditions: []v1.NodeCondition{ @@ -5648,7 +6178,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + EgressIPs: []string{egressIP}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -5659,62 +6189,48 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - Node: node1.Name, - EgressIP: egressIP1, - }, - }, + Items: []egressipv1.EgressIPStatusItem{}, }, } + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouter{ - Name: types.OVNClusterRouter, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, + node1GR, + node1LSP, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, + Networks: []string{"100.64.0.2/29"}, }, + node1Switch, }, }, &egressipv1.EgressIPList{ Items: []egressipv1.EgressIP{eIP}, }, &v1.NodeList{ - Items: []v1.Node{node1, node2}, + Items: []v1.Node{node1}, }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, @@ -5723,12 +6239,13 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Pod{egressPod}, }, ) - i, n, _ := net.ParseCIDR(podV4IP + "/23") n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -5737,90 +6254,52 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ + }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, node1GR, node1LSP, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) return nil } @@ -5828,23 +6307,26 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should remove stale EgressIP setup when pod is deleted while ovnkube-master is not running", func() { + ginkgo.It("ensure external gw pod snat entry is not created back when pod is moved into completed state", func() { app.Action = func(ctx *cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" - egressIP1 := "192.168.126.25" - node1IPv4 := "192.168.126.51/24" - + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) egressNamespace := newNamespace(namespace) node1 := v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: node1Name, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, Annotations: map[string]string{ "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", }, }, Status: v1.NodeStatus{ @@ -5860,7 +6342,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + EgressIPs: []string{egressIP}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -5871,219 +6353,32 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - Node: node1.Name, - EgressIP: egressIP1, - }, - }, + Items: []egressipv1.EgressIPStatusItem{}, }, } - expectedNatLogicalPort := "k8s-node1" + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "keep-me-UUID", - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Priority: types.DefaultNoRereoutePriority, - Action: nbdb.LogicalRouterPolicyActionAllow, - }, - &nbdb.LogicalRouterPolicy{ - UUID: "remove-me-UUID", - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - Match: "ip.src == 10.128.3.8", - Priority: types.EgressIPReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"remove-me-UUID", "keep-me-UUID"}, - }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - ) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "keep-me-UUID", - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Priority: types.DefaultNoRereoutePriority, - Action: nbdb.LogicalRouterPolicyActionAllow, - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"keep-me-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("ensure egress ip entries are not created when pod is already moved into completed state", func() { - app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - egressIP := "192.168.126.25" - node1IPv4 := "192.168.126.12/24" - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressPod.Status.Phase = kapi.PodSucceeded - egressNamespace := newNamespace(namespace) - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, - }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - node1Switch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } - node1GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - } - node1LSP := &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, node1GR, node1LSP, @@ -6108,6 +6403,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Pod{egressPod}, }, ) + i, n, _ := net.ParseCIDR(podV4IP + "/23") n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) @@ -6123,6 +6419,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) @@ -6132,21 +6430,78 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) egressIPs, nodes := getEgressIPStatus(egressIPName) gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.StringPtr("k8s-node1"), + Options: map[string]string{ + "stateless": "false", + }, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + UUID: "reroute-UUID1", + } + node1GR.Nat = []string{"egressip-nat-UUID1"} node1LSP.Options = map[string]string{ "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", } expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, podReRoutePolicy, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + + egressPod.Status.Phase = v1.PodSucceeded + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), &egressPod, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Wait for pod to get moved into succeeded state. + gomega.Eventually(func() v1.PodPhase { + egressPod1, _ := fakeOvn.watcher.GetPod(egressPod.Namespace, egressPod.Name) + return egressPod1.Status.Phase + }, 5).Should(gomega.Equal(v1.PodSucceeded)) + + node1GR.Nat = []string{} + expectedDatabaseStatewitCompletedPod := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", @@ -6168,7 +6523,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Networks: []string{"100.64.0.2/29"}, }, node1Switch} - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewitCompletedPod)) + return nil } @@ -6176,42 +6532,37 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("ensure external gw pod snat entry is not created back when pod is moved into completed state", func() { + ginkgo.It("should ensure SNATs towards egressIP and nodeIP are correctly configured during egressIP re-assignment", func() { app.Action = func(ctx *cli.Context) error { config.Gateway.DisableSNATMultipleGWs = true - egressIP := "192.168.126.25" + + egressIP1 := "192.168.126.101" + egressIP2 := "192.168.126.102" node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.51/24" - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod1 := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod2 := *newPodWithLabels(namespace, "egress-pod2", node2Name, "10.128.0.16", egressPodLabel) egressNamespace := newNamespace(namespace) - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.51/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "89fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, + EgressIPs: []string{egressIP1, egressIP2}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -6226,22 +6577,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - node1Switch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } - node1GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - } - node1LSP := &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ @@ -6249,37 +6584,64 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, - node1GR, - node1LSP, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{"100.64.0.2/29"}, }, - node1Switch, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, }, }, &egressipv1.EgressIPList{ Items: []egressipv1.EgressIP{eIP}, }, &v1.NodeList{ - Items: []v1.Node{node1}, + Items: []v1.Node{node1, node2}, }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod}, + Items: []v1.Pod{egressPod1, egressPod2}, }, ) i, n, _ := net.ParseCIDR(podV4IP + "/23") n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + fakeOvn.controller.logicalPortCache.add(&egressPod1, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + i, n, _ = net.ParseCIDR("10.128.0.16" + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod2, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -6288,462 +6650,334 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) - gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - podEIPSNAT := &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node1"), - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy := &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - UUID: "reroute-UUID1", - } - node1GR.Nat = []string{"egressip-nat-UUID1"} - node1LSP.Options = map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - } - expectedDatabaseStatewithPod := []libovsdbtest.TestData{ - podEIPSNAT, &nbdb.LogicalRouterPolicy{ + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-UUID", - }, &nbdb.LogicalRouterPolicy{ + }, + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, podReRoutePolicy, &nbdb.LogicalRouter{ + }, + &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, - }, node1GR, node1LSP, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{"100.64.0.2/29"}, - }, node1Switch} + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } - egressPod.Status.Phase = kapi.PodSucceeded - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), &egressPod, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - // Wait for pod to get moved into succeeded state. - gomega.Eventually(func() v1.PodPhase { - egressPod1, _ := fakeOvn.watcher.GetPod(egressPod.Namespace, egressPod.Name) - return egressPod1.Status.Phase - }, 5).Should(gomega.Equal(kapi.PodSucceeded)) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - node1GR.Nat = []string{} - expectedDatabaseStatewitCompletedPod := []libovsdbtest.TestData{ + fakeOvn.patchEgressIPObj(node1Name, egressIP1) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + eips, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + + expectedNatLogicalPort1 := "k8s-node1" + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-UUID", - }, &nbdb.LogicalRouterPolicy{ + }, + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, node1GR, node1LSP, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, node1Switch} - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewitCompletedPod)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should remove stale pod SNAT referring to wrong logical port after ovnkube-master is started", func() { - app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - egressIP := "192.168.126.25" - node1IPv4 := "192.168.126.12/24" - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: map[string]string{ + "name": eIP.Name, }, + UUID: "reroute-UUID1", }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: map[string]string{ + "name": eIP.Name, }, + UUID: "reroute-UUID2", }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, }, - } - - node1Switch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } - node1GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - } - node1LSP := &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, }, - } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - node1GR, - node1LSP, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - node1Switch, - // This is unexpected snat entry where its logical port refers to an unavailable node - // and ensure this entry is removed as soon as ovnk master is up and running. - &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node2"), - Options: map[string]string{ - "stateless": "false", - }, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: "10.128.0.16", + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, }, - &v1.NodeList{ - Items: []v1.Node{node1}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, }, - ) + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } - err := fakeOvn.controller.WatchPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP1, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName, status) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) - gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - egressIPs, nodes := getEgressIPStatus(egressIPName) + + eips, nodes = getEgressIPStatus(egressIPName) gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Expect(nodes[1]).To(gomega.Equal(node2.Name)) - podEIPSNAT := &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node1"), - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy := &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - UUID: "reroute-UUID1", - } - node1GR.Nat = []string{"egressip-nat-UUID1"} - node1LSP.Options = map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - } - expectedDatabaseStatewithPod := []libovsdbtest.TestData{ - podEIPSNAT, &nbdb.LogicalRouterPolicy{ + expectedNatLogicalPort2 := "k8s-node2" + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-UUID", - }, &nbdb.LogicalRouterPolicy{ + }, + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, podReRoutePolicy, &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, - }, node1GR, node1LSP, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, node1Switch} - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should only get assigned EgressIPs which matches their subnet when the node is tagged", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := "192.168.126.101" - node1IPv4 := "192.168.128.202/24" - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2", "100.64.0.3"}, + ExternalIDs: map[string]string{ + "name": eIP.Name, }, + UUID: "reroute-UUID1", }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2", "100.64.0.3"}, + ExternalIDs: map[string]string{ + "name": eIP.Name, }, + UUID: "reroute-UUID2", }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", }, }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: "10.128.0.16", + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, ip1V4Sub, err := net.ParseCIDR(node1IPv4) - _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) - _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) - - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", + &nbdb.NAT{ + UUID: "egressip-nat-UUID3", + LogicalIP: podV4IP, + ExternalIP: eips[1], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort2, + Options: map[string]string{ + "stateless": "false", + }, }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", + &nbdb.NAT{ + UUID: "egressip-nat-UUID4", + LogicalIP: "10.128.0.16", + ExternalIP: eips[1], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort2, + Options: map[string]string{ + "stateless": "false", + }, }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node2.Name, UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID3", "egressip-nat-UUID4"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, &nbdb.LogicalSwitchPort{ @@ -6751,28 +6985,25 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeFalse()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node2.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) - gomega.Eventually(eIP.Status.Items).Should(gomega.HaveLen(0)) - - node1.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } + // remove label from node2 + node2.Labels = map[string]string{} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node1Name, egressIP1) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -6786,2378 +7017,203 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID1", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID2", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: "10.128.0.16", + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID3", + LogicalIP: "10.128.0.16", + ExternalIP: "192.168.126.51", // adds back SNAT towards nodeIP + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", + }, + }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node2.Name, UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID3"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should try re-assigning EgressIP until all defined egress IPs are assigned", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - egressIP2 := "192.168.126.102" - node1IPv4 := "192.168.126.12/24" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - - node1.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - _, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should ensure SNATs towards egressIP and nodeIP are correctly configured during egressIP re-assignment", func() { - app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - - egressIP1 := "192.168.126.101" - egressIP2 := "192.168.126.102" - node1IPv4 := "192.168.126.12/24" - node2IPv4 := "192.168.126.51/24" - - egressPod1 := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressPod2 := *newPodWithLabels(namespace, "egress-pod2", node2Name, "10.128.0.16", egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.51/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "89fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, - }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod1, egressPod2}, - }, - ) - - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod1, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - i, n, _ = net.ParseCIDR("10.128.0.16" + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod2, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeFalse()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - - node1.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - eips, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - - expectedNatLogicalPort1 := "k8s-node1" - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID1", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID2", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: "10.128.0.16", - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - - eips, nodes = getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(nodes[1]).To(gomega.Equal(node2.Name)) - - expectedNatLogicalPort2 := "k8s-node2" - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2", "100.64.0.3"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID1", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2", "100.64.0.3"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID2", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: "10.128.0.16", - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID3", - LogicalIP: podV4IP, - ExternalIP: eips[1], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort2, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID4", - LogicalIP: "10.128.0.16", - ExternalIP: eips[1], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort2, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID3", "egressip-nat-UUID4"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - // remove label from node2 - node2.Labels = map[string]string{} - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID1", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID2", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: "10.128.0.16", - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID3", - LogicalIP: "10.128.0.16", - ExternalIP: "192.168.126.51", // adds back SNAT towards nodeIP - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID3"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - // remove label from node1 - node1.Labels = map[string]string{} - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) // though 2 egressIPs to be re-assigned its only 1 egressIP object - - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: "192.168.126.12", // adds back SNAT towards nodeIP - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID3", - LogicalIP: "10.128.0.16", - ExternalIP: "192.168.126.51", - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID3"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should re-balance EgressIPs when their node is removed", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.12/24" - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes = getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - - err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, *metav1.NewDeleteOptions(0)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).ToNot(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - - getNewNode := func() string { - _, nodes = getEgressIPStatus(egressIPName) - if len(nodes) > 0 { - return nodes[0] - } - return "" - } - - gomega.Eventually(getNewNode).Should(gomega.Equal(node2.Name)) - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("egress node update should not mark the node as reachable if there was no label/readiness change", func() { - // When an egress node becomes reachable during a node update event and there is no changes to node labels/readiness - // unassigned egress IP should be eventually added by the periodic reachability check. - // Test steps: - // - disable periodic check from running in background, so it can be called directly from the test - // - assign egress IP to an available node - // - make the node unreachable and verify that the egress IP was unassigned - // - make the node reachable and update a node - // - verify that the egress IP was assigned by calling the periodic reachability check - app.Action = func(ctx *cli.Context) error { - egressIP := "192.168.126.101" - nodeIPv4 := "192.168.126.51/24" - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", nodeIPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\"]}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - eIP1 := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP1}, - }, - &v1.NodeList{ - Items: []v1.Node{node}, - }, - ) - - // Virtually disable background reachability check by using a huge interval - fakeOvn.controller.eIPC.reachabilityCheckInterval = time.Hour - - err := fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) - egressIPs, _ := getEgressIPStatus(eIP1.Name) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - hcClient := fakeOvn.controller.eIPC.allocator.cache[node.Name].healthClient.(*fakeEgressIPHealthClient) - hcClient.FakeProbeFailure = true - // explicitly call check reachability, periodic checker is not active - checkEgressNodesReachabilityIterate(fakeOvn.controller) - gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(0)) - - hcClient.FakeProbeFailure = false - node.Annotations["test"] = "dummy" - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(hcClient.IsConnected()).Should(gomega.Equal(true)) - // the node should not be marked as reachable in the update handler as it is not getting added - gomega.Consistently(func() bool { return fakeOvn.controller.eIPC.allocator.cache[node.Name].isReachable }).Should(gomega.Equal(false)) - - // egress IP should get assigned on the next checkEgressNodesReachabilityIterate call - // explicitly call check reachability, periodic checker is not active - checkEgressNodesReachabilityIterate(fakeOvn.controller) - gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) - - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("Dual-stack assignment", func() { - - ginkgo.It("should be able to allocate non-conflicting IPv4 on node which can host it, even if it happens to be the node with more assignments", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - egressIP := "192.168.126.99" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus1"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus1", "192.168.126.102": "bogus2"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - }) - - ginkgo.Context("IPv4 assignment", func() { - - ginkgo.It("Should not be able to assign egress IP defined in CIDR notation", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIPs := []string{"192.168.126.99/32"} - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - validatedIPs, err := fakeOvn.controller.validateEgressIPSpec(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(err).To(gomega.HaveOccurred()) - gomega.Expect(err.Error()).To(gomega.Equal(fmt.Sprintf("unable to parse provided EgressIP: %s, invalid", egressIPs[0]))) - gomega.Expect(validatedIPs).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - }) - - ginkgo.Context("IPv6 assignment", func() { - - ginkgo.It("should be able to allocate non-conflicting IP on node with lowest amount of allocations", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "0:0:0:0:0:feff:c0a8:8e0f" - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should be able to allocate several EgressIPs and avoid the same node", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP1 := "0:0:0:0:0:feff:c0a8:8e0d" - egressIP2 := "0:0:0:0:0:feff:c0a8:8e0f" - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(2)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP1).String())) - gomega.Expect(assignedStatuses[1].Node).To(gomega.Equal(node1.name)) - gomega.Expect(assignedStatuses[1].EgressIP).To(gomega.Equal(net.ParseIP(egressIP2).String())) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should be able to allocate several EgressIPs and avoid the same node and leave one un-assigned without error", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP1 := "0:0:0:0:0:feff:c0a8:8e0d" - egressIP2 := "0:0:0:0:0:feff:c0a8:8e0e" - egressIP3 := "0:0:0:0:0:feff:c0a8:8e0f" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2, egressIP3}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(2)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP1).String())) - gomega.Expect(assignedStatuses[1].Node).To(gomega.Equal(node1.name)) - gomega.Expect(assignedStatuses[1].EgressIP).To(gomega.Equal(net.ParseIP(egressIP2).String())) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should return the already allocated IP with the same node if it is allocated again", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "0:0:0:0:0:feff:c0a8:8e32" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{egressIP: egressIPName, "0:0:0:0:0:feff:c0a8:8e1e": "bogus1"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus2"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - egressIPs := []string{egressIP} - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node1Name)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate node IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "0:0:0:0:0:feff:c0a8:8e0c" - - node1 := setupNode(node1Name, []string{egressIP + "/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate conflicting compressed IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "::feff:c0a8:8e32" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - egressIPs := []string{egressIP} - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate IPv4 IP on nodes which can only host IPv6", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "192.168.126.16" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIPs := []string{egressIP} - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: eIPs, - }, - } - - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should be able to allocate non-conflicting compressed uppercase IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "::FEFF:C0A8:8D32" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate conflicting compressed uppercase IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "::FEFF:C0A8:8E32" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - egressIPs := []string{egressIP} - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate invalid IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIPs := []string{"0:0:0:0:0:feff:c0a8:8e32:5"} - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - assignedStatuses, err := fakeOvn.controller.validateEgressIPSpec(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(err).To(gomega.HaveOccurred()) - gomega.Expect(err.Error()).To(gomega.Equal(fmt.Sprintf("unable to parse provided EgressIP: %s, invalid", egressIPs[0]))) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("WatchEgressIP", func() { - - ginkgo.It("should update status correctly for single-stack IPv4", func() { - app.Action = func(ctx *cli.Context) error { - fakeOvn.startWithDBSetup(clusterRouterDbSetup) - - egressIP := "192.168.126.10" - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": "does-not-exist", - }, - }, - }, - } - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should update status correctly for single-stack IPv6", func() { - app.Action = func(ctx *cli.Context) error { - fakeOvn.startWithDBSetup(clusterRouterDbSetup) - - egressIP := "0:0:0:0:0:feff:c0a8:8e0d" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(net.ParseIP(egressIP).String())) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should update status correctly for dual-stack", func() { - app.Action = func(ctx *cli.Context) error { - fakeOvn.startWithDBSetup(clusterRouterDbSetup) - - egressIPv4 := "192.168.126.101" - egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus1"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus2", "192.168.126.102": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIPv4, egressIPv6}, - }, - } - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes).To(gomega.ConsistOf(node2.name, node1.name)) - gomega.Expect(egressIPs).To(gomega.ConsistOf(net.ParseIP(egressIPv6).String(), net.ParseIP(egressIPv4).String())) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("syncEgressIP for dual-stack", func() { - - ginkgo.It("should not update valid assignments", func() { - app.Action = func(ctx *cli.Context) error { - - egressIPv4 := "192.168.126.101" - egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.102": "bogus3"}) - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIPv4, egressIPv6}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIPv4, - Node: node2.name, - }, - { - EgressIP: net.ParseIP(egressIPv6).String(), - Node: node1.name, - }, - }, - }, - } - - fakeOvn.startWithDBSetup(clusterRouterDbSetup, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes).To(gomega.ConsistOf(eIP.Status.Items[0].Node, eIP.Status.Items[1].Node)) - gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("syncEgressIP for IPv4", func() { - - ginkgo.It("should update invalid assignments on duplicated node", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - egressIP2 := "192.168.126.100" - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{egressIP1: egressIPName, egressIP2: egressIPName}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIP1, - Node: node1.name, - }, - { - EgressIP: egressIP2, - Node: node1.name, - }, - }, - }, - } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1Name, - UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2Name, - UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes).To(gomega.ConsistOf(node1.name, node2.name)) - gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should update invalid assignments with incorrectly parsed IP", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - egressIPIncorrect := "192.168.126.1000" - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIPIncorrect, - Node: node1.name, - }, + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1Name, - UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2Name, - UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - ) + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + // remove label from node1 + node1.Labels = map[string]string{} - err := fakeOvn.controller.WatchEgressIP() + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should update invalid assignments with unhostable IP on a node", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - egressIPIncorrect := "192.168.128.100" + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status = []egressipv1.EgressIPStatusItem{} + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) // though 2 egressIPs to be re-assigned its only 1 egressIP object - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIPIncorrect, - Node: node1.name, - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: "192.168.126.12", // adds back SNAT towards nodeIP + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", }, }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1Name, - UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2Name, - UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID3", + LogicalIP: "10.128.0.16", + ExternalIP: "192.168.126.51", + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not update valid assignment", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1"}, }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIP1, - Node: node1.name, - }, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID3"}, }, - } - - fakeOvn.startWithDBSetup(clusterRouterDbSetup, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("AddEgressIP for IPv4", func() { - - ginkgo.It("should not create two EgressIPs with same egress IP value", func() { - app.Action = func(ctx *cli.Context) error { - egressIP1 := "192.168.126.101" - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - eIP1 := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta("egressip"), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, }, - } - eIP2 := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta("egressip2"), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, } - - fakeOvn.startWithDBSetup(clusterRouterDbSetup) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP1, metav1.CreateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(eIP1.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP2.Name)).Should(gomega.Equal(0)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -9166,85 +7222,298 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - }) - - ginkgo.Context("UpdateEgressIP for IPv4", func() { - - ginkgo.It("should perform re-assingment of EgressIPs", func() { + ginkgo.It("should re-balance EgressIPs when their node is removed", func() { app.Action = func(ctx *cli.Context) error { egressIP := "192.168.126.101" - updateEgressIP := "192.168.126.10" + node1IPv4 := "192.168.126.12/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" - node1 := setupNode(node1Name, []string{"192.168.126.41/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + node2 := getNodeObj(node2Name, annotations, labels) - eIP1 := egressipv1.EgressIP{ + eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ EgressIPs: []string{egressIP}, }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, } + fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1Name, - UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2Name, - UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, }, }, }, - ) + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - err := fakeOvn.controller.WatchEgressIP() + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP1, metav1.CreateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + fakeOvn.patchEgressIPObj(node1Name, egressIP) gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - eIPToUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - eIPToUpdate.Spec.EgressIPs = []string{updateEgressIP} + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPToUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - getEgressIP := func() string { - egressIPs, _ = getEgressIPStatus(egressIPName) - if len(egressIPs) == 0 { - return "try again" + err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + + getNewNode := func() string { + _, nodes = getEgressIPStatus(egressIPName) + if len(nodes) > 0 { + return nodes[0] } - return egressIPs[0] + return "" } - gomega.Eventually(getEgressIP).Should(gomega.Equal(updateEgressIP)) - _, nodes = getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Eventually(getNewNode).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -9253,3 +7522,63 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }) }) }) + +// TEST UTILITY FUNCTIONS; +// reduces redundant code + +func getEIPSNAT(podIP, egressIP, expectedNatLogicalPort string) *nbdb.NAT { + return &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podIP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, + } +} + +func getReRoutePolicy(podIP, ipFamily string, nodeLogicalRouterIPv4 []string) *nbdb.LogicalRouterPolicy { + return &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip%s.src == %s", ipFamily, podIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + UUID: "reroute-UUID", + } +} + +func getReRouteStaticRoute(podIP, nextHop string) *nbdb.LogicalRouterStaticRoute { + return &nbdb.LogicalRouterStaticRoute{ + ExternalIDs: map[string]string{"name": egressIPName}, + Nexthop: nextHop, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + IPPrefix: podIP, + UUID: "reroute-static-route-UUID", + } +} + +func getNodeObj(nodeName string, annotations, labels map[string]string) v1.Node { + return v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Annotations: annotations, + Labels: labels, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } +} diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index d10ca81cc2..bd3ec1410a 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -34,10 +34,7 @@ import ( "k8s.io/klog/v2" ) -const ( - egressFirewallDNSDefaultDuration = 30 * time.Minute - egressIPReachabilityCheckInterval = 5 * time.Second -) +const egressFirewallDNSDefaultDuration = 30 * time.Minute // ACL logging severity levels type ACLLoggingLevels struct { @@ -301,13 +298,6 @@ func (oc *DefaultNetworkController) WatchEgressFwNodes() error { return err } -// WatchCloudPrivateIPConfig starts the watching of cloudprivateipconfigs -// resource and calls back the appropriate handler logic. -func (oc *DefaultNetworkController) WatchCloudPrivateIPConfig() error { - _, err := oc.retryCloudPrivateIPConfig.WatchResource() - return err -} - // WatchEgressIP starts the watching of egressip resource and calls back the // appropriate handler logic. It also initiates the other dedicated resource // handlers for egress IP setup: namespaces, pods. @@ -494,10 +484,10 @@ func (oc *DefaultNetworkController) InitEgressServiceController() (*egresssvc.Co } if hcPort == 0 { - return isReachableLegacy(nodeName, mgmtIPs, timeout) + return egresssvc.IsReachableLegacy(nodeName, mgmtIPs, timeout) } - return isReachableViaGRPC(mgmtIPs, healthClient, hcPort, timeout) + return egresssvc.IsReachableViaGRPC(mgmtIPs, healthClient, hcPort, timeout) } return egresssvc.NewController(DefaultNetworkControllerName, oc.client, oc.nbClient, oc.addressSetFactory, diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index 53f5caacea..d6089023a3 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -18,6 +18,7 @@ import ( egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" egressqos "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1" egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" @@ -225,7 +226,6 @@ func NewOvnController(ovnClient *util.OVNMasterClientset, wf *factory.WatchFacto Kube: kube.Kube{KClient: ovnClient.KubeClient}, EIPClient: ovnClient.EgressIPClient, EgressFirewallClient: ovnClient.EgressFirewallClient, - CloudNetworkClient: ovnClient.CloudNetworkClient, EgressServiceClient: ovnClient.EgressServiceClient, }, wf, @@ -331,7 +331,6 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt Kube: kube.Kube{KClient: o.fakeClient.KubeClient}, EIPClient: o.fakeClient.EgressIPClient, EgressFirewallClient: o.fakeClient.EgressFirewallClient, - CloudNetworkClient: o.fakeClient.CloudNetworkClient, }, o.watcher, o.fakeRecorder, @@ -380,3 +379,17 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt secondaryController.AddNAD(nadName) return nil } + +func (o *FakeOVN) patchEgressIPObj(nodeName, egressIP string) { + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: nodeName, + EgressIP: egressIP, + }, + } + err := o.controller.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} diff --git a/go-controller/pkg/syncmap/syncmap.go b/go-controller/pkg/syncmap/syncmap.go index 97f4c45845..bc59ef6cc0 100644 --- a/go-controller/pkg/syncmap/syncmap.go +++ b/go-controller/pkg/syncmap/syncmap.go @@ -140,6 +140,14 @@ func (c *SyncMap[T]) LoadOrStore(lockedKey string, newEntry T) (value T, loaded } } +// Store sets the value for a key. +// If key-value was already present, it will be over-written +func (c *SyncMap[T]) Store(lockedKey string, newEntry T) { + c.entriesMutex.Lock() + defer c.entriesMutex.Unlock() + c.entries[lockedKey] = newEntry +} + // Delete deletes object from the entries map func (c *SyncMap[T]) Delete(lockedKey string) { c.entriesMutex.Lock() diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 4423c799d1..0aead0e5dc 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -55,7 +55,6 @@ type OVNMasterClientset struct { KubeClient kubernetes.Interface EgressIPClient egressipclientset.Interface EgressFirewallClient egressfirewallclientset.Interface - CloudNetworkClient ocpcloudnetworkclientset.Interface EgressQoSClient egressqosclientset.Interface MultiNetworkPolicyClient multinetworkpolicyclientset.Interface EgressServiceClient egressserviceclientset.Interface @@ -78,7 +77,6 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { KubeClient: cs.KubeClient, EgressIPClient: cs.EgressIPClient, EgressFirewallClient: cs.EgressFirewallClient, - CloudNetworkClient: cs.CloudNetworkClient, EgressQoSClient: cs.EgressQoSClient, MultiNetworkPolicyClient: cs.MultiNetworkPolicyClient, EgressServiceClient: cs.EgressServiceClient, diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index ea6c426622..d1400ca5ae 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -1076,19 +1076,26 @@ spec: ginkgo.By("7. Check the OVN DB to ensure no SNATs are added for the standby egressIP") dbPods, err := framework.RunKubectl("ovn-kubernetes", "get", "pods", "-l", "name=ovnkube-db", "-o=jsonpath='{.items..metadata.name}'") + dbContainerName := "nb-ovsdb" + if isInterconnectEnabled() { + dbPods, err = framework.RunKubectl("ovn-kubernetes", "get", "pods", "-l", "name=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", egress1Node.name), "-o=jsonpath='{.items..metadata.name}'") + } if err != nil || len(dbPods) == 0 { framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) } dbPod := strings.Split(dbPods, " ")[0] dbPod = strings.TrimPrefix(dbPod, "'") dbPod = strings.TrimSuffix(dbPod, "'") + if len(dbPod) == 0 { + framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) + } logicalIP := fmt.Sprintf("logical_ip=%s", srcPodIP.String()) - snats, err := framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", "nb-ovsdb", "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err := framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) } if !strings.Contains(snats, statuses[0].EgressIP) || strings.Contains(snats, egressIP3.String()) { - framework.Failf("Step 7. Check that the second egressIP object is assigned to node2 (pod2Node/egress1Node), failed") + framework.Failf("Step 7. Check the OVN DB to ensure no SNATs are added for the standby egressIP, failed") } ginkgo.By("8. Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP from object1") @@ -1147,7 +1154,7 @@ spec: framework.ExpectNoError(err, "Step 11. Check connectivity from pod to an external container and verify that the srcIP is the expected standby egressIP3 from object2, failed: %v", err) ginkgo.By("12. Check the OVN DB to ensure SNATs are added for only the standby egressIP") - snats, err = framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", "nb-ovsdb", "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err = framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure SNATs are added for only the standby egressIP, err: %v", err) } @@ -1181,8 +1188,21 @@ spec: }) framework.ExpectNoError(err, "Step 14. Ensure egressIP1 from egressIP object1 and egressIP3 from object2 is correctly transferred to egress2Node, failed: %v", err) + if isInterconnectEnabled() { + dbPods, err = framework.RunKubectl("ovn-kubernetes", "get", "pods", "-l", "name=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", egress2Node.name), "-o=jsonpath='{.items..metadata.name}'") + } + if err != nil || len(dbPods) == 0 { + framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) + } + dbPod = strings.Split(dbPods, " ")[0] + dbPod = strings.TrimPrefix(dbPod, "'") + dbPod = strings.TrimSuffix(dbPod, "'") + if len(dbPod) == 0 { + framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) + } + ginkgo.By("15. Check the OVN DB to ensure SNATs are added for either egressIP1 or egressIP3") - snats, err = framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", "nb-ovsdb", "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err = framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure SNATs are added for either egressIP1 or egressIP3, err: %v", err) } diff --git a/test/e2e/util.go b/test/e2e/util.go index ccb635aac4..14e0a1e4f2 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1061,3 +1061,8 @@ func randStr(n int) string { } return string(b) } + +func isInterconnectEnabled() bool { + val, present := os.LookupEnv("OVN_INTERCONNECT_ENABLE") + return present && val == "true" +} From c915d41a9de52eb7f7753081f7cba34b522f672b Mon Sep 17 00:00:00 2001 From: jordigilh Date: Sat, 8 Apr 2023 10:50:45 -0400 Subject: [PATCH 67/90] Add Admin Policy Based External Route CRD and related generated code (informer,lister,api) Signed-off-by: jordigilh --- .../v1/apis/clientset/versioned/clientset.go | 96 ++++++++ .../v1/apis/clientset/versioned/doc.go | 19 ++ .../versioned/fake/clientset_generated.go | 84 +++++++ .../v1/apis/clientset/versioned/fake/doc.go | 19 ++ .../apis/clientset/versioned/fake/register.go | 55 +++++ .../v1/apis/clientset/versioned/scheme/doc.go | 19 ++ .../clientset/versioned/scheme/register.go | 55 +++++ .../v1/adminpolicybasedexternalroute.go | 183 +++++++++++++++ .../v1/adminpolicybasedroute_client.go | 88 +++++++ .../typed/adminpolicybasedroute/v1/doc.go | 19 ++ .../adminpolicybasedroute/v1/fake/doc.go | 19 ++ .../fake_adminpolicybasedexternalroute.go | 132 +++++++++++ .../fake/fake_adminpolicybasedroute_client.go | 39 ++++ .../v1/generated_expansion.go | 20 ++ .../adminpolicybasedroute/interface.go | 45 ++++ .../v1/adminpolicybasedexternalroute.go | 88 +++++++ .../adminpolicybasedroute/v1/interface.go | 44 ++++ .../informers/externalversions/factory.go | 179 ++++++++++++++ .../informers/externalversions/generic.go | 61 +++++ .../internalinterfaces/factory_interfaces.go | 39 ++++ .../v1/adminpolicybasedexternalroute.go | 67 ++++++ .../v1/expansion_generated.go | 22 ++ .../pkg/crd/adminpolicybasedroute/v1/doc.go | 4 + .../crd/adminpolicybasedroute/v1/register.go | 29 +++ .../pkg/crd/adminpolicybasedroute/v1/types.go | 166 +++++++++++++ .../v1/zz_generated.deepcopy.go | 220 ++++++++++++++++++ 26 files changed, 1811 insertions(+) create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/clientset.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/clientset_generated.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/register.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/register.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedroute_client.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedexternalroute.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedroute_client.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/generated_expansion.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/interface.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/interface.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/factory.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/generic.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/expansion_generated.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/register.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/types.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/zz_generated.deepcopy.go diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/clientset.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/clientset.go new file mode 100644 index 0000000000..c6c8453aed --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/clientset.go @@ -0,0 +1,96 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package versioned + +import ( + "fmt" + + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1" + discovery "k8s.io/client-go/discovery" + rest "k8s.io/client-go/rest" + flowcontrol "k8s.io/client-go/util/flowcontrol" +) + +type Interface interface { + Discovery() discovery.DiscoveryInterface + K8sV1() k8sv1.K8sV1Interface +} + +// Clientset contains the clients for groups. Each group has exactly one +// version included in a Clientset. +type Clientset struct { + *discovery.DiscoveryClient + k8sV1 *k8sv1.K8sV1Client +} + +// K8sV1 retrieves the K8sV1Client +func (c *Clientset) K8sV1() k8sv1.K8sV1Interface { + return c.k8sV1 +} + +// Discovery retrieves the DiscoveryClient +func (c *Clientset) Discovery() discovery.DiscoveryInterface { + if c == nil { + return nil + } + return c.DiscoveryClient +} + +// NewForConfig creates a new Clientset for the given config. +// If config's RateLimiter is not set and QPS and Burst are acceptable, +// NewForConfig will generate a rate-limiter in configShallowCopy. +func NewForConfig(c *rest.Config) (*Clientset, error) { + configShallowCopy := *c + if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 { + if configShallowCopy.Burst <= 0 { + return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0") + } + configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst) + } + var cs Clientset + var err error + cs.k8sV1, err = k8sv1.NewForConfig(&configShallowCopy) + if err != nil { + return nil, err + } + + cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfig(&configShallowCopy) + if err != nil { + return nil, err + } + return &cs, nil +} + +// NewForConfigOrDie creates a new Clientset for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *Clientset { + var cs Clientset + cs.k8sV1 = k8sv1.NewForConfigOrDie(c) + + cs.DiscoveryClient = discovery.NewDiscoveryClientForConfigOrDie(c) + return &cs +} + +// New creates a new Clientset for the given RESTClient. +func New(c rest.Interface) *Clientset { + var cs Clientset + cs.k8sV1 = k8sv1.New(c) + + cs.DiscoveryClient = discovery.NewDiscoveryClient(c) + return &cs +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/doc.go new file mode 100644 index 0000000000..518bc288b3 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated clientset. +package versioned diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/clientset_generated.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/clientset_generated.go new file mode 100644 index 0000000000..b7b1d957c8 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/clientset_generated.go @@ -0,0 +1,84 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + clientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1" + fakek8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/discovery" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/testing" +) + +// NewSimpleClientset returns a clientset that will respond with the provided objects. +// It's backed by a very simple object tracker that processes creates, updates and deletions as-is, +// without applying any validations and/or defaults. It shouldn't be considered a replacement +// for a real clientset and is mostly useful in simple unit tests. +func NewSimpleClientset(objects ...runtime.Object) *Clientset { + o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder()) + for _, obj := range objects { + if err := o.Add(obj); err != nil { + panic(err) + } + } + + cs := &Clientset{tracker: o} + cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake} + cs.AddReactor("*", "*", testing.ObjectReaction(o)) + cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) { + gvr := action.GetResource() + ns := action.GetNamespace() + watch, err := o.Watch(gvr, ns) + if err != nil { + return false, nil, err + } + return true, watch, nil + }) + + return cs +} + +// Clientset implements clientset.Interface. Meant to be embedded into a +// struct to get a default implementation. This makes faking out just the method +// you want to test easier. +type Clientset struct { + testing.Fake + discovery *fakediscovery.FakeDiscovery + tracker testing.ObjectTracker +} + +func (c *Clientset) Discovery() discovery.DiscoveryInterface { + return c.discovery +} + +func (c *Clientset) Tracker() testing.ObjectTracker { + return c.tracker +} + +var ( + _ clientset.Interface = &Clientset{} + _ testing.FakeClient = &Clientset{} +) + +// K8sV1 retrieves the K8sV1Client +func (c *Clientset) K8sV1() k8sv1.K8sV1Interface { + return &fakek8sv1.FakeK8sV1{Fake: &c.Fake} +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/doc.go new file mode 100644 index 0000000000..19e0028ffb --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated fake clientset. +package fake diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/register.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/register.go new file mode 100644 index 0000000000..d98971e92b --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/register.go @@ -0,0 +1,55 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var scheme = runtime.NewScheme() +var codecs = serializer.NewCodecFactory(scheme) + +var localSchemeBuilder = runtime.SchemeBuilder{ + k8sv1.AddToScheme, +} + +// AddToScheme adds all types of this clientset into the given scheme. This allows composition +// of clientsets, like in: +// +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) +// +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// +// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types +// correctly. +var AddToScheme = localSchemeBuilder.AddToScheme + +func init() { + v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"}) + utilruntime.Must(AddToScheme(scheme)) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/doc.go new file mode 100644 index 0000000000..1aec4021fc --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package contains the scheme of the automatically generated clientset. +package scheme diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/register.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/register.go new file mode 100644 index 0000000000..8b6a438be5 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/register.go @@ -0,0 +1,55 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package scheme + +import ( + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var Scheme = runtime.NewScheme() +var Codecs = serializer.NewCodecFactory(Scheme) +var ParameterCodec = runtime.NewParameterCodec(Scheme) +var localSchemeBuilder = runtime.SchemeBuilder{ + k8sv1.AddToScheme, +} + +// AddToScheme adds all types of this clientset into the given scheme. This allows composition +// of clientsets, like in: +// +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) +// +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// +// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types +// correctly. +var AddToScheme = localSchemeBuilder.AddToScheme + +func init() { + v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"}) + utilruntime.Must(AddToScheme(Scheme)) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go new file mode 100644 index 0000000000..ecfc25d1b3 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go @@ -0,0 +1,183 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +import ( + "context" + "time" + + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + scheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + rest "k8s.io/client-go/rest" +) + +// AdminPolicyBasedExternalRoutesGetter has a method to return a AdminPolicyBasedExternalRouteInterface. +// A group's client should implement this interface. +type AdminPolicyBasedExternalRoutesGetter interface { + AdminPolicyBasedExternalRoutes() AdminPolicyBasedExternalRouteInterface +} + +// AdminPolicyBasedExternalRouteInterface has methods to work with AdminPolicyBasedExternalRoute resources. +type AdminPolicyBasedExternalRouteInterface interface { + Create(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.CreateOptions) (*v1.AdminPolicyBasedExternalRoute, error) + Update(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.UpdateOptions) (*v1.AdminPolicyBasedExternalRoute, error) + UpdateStatus(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.UpdateOptions) (*v1.AdminPolicyBasedExternalRoute, error) + Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error + Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.AdminPolicyBasedExternalRoute, error) + List(ctx context.Context, opts metav1.ListOptions) (*v1.AdminPolicyBasedExternalRouteList, error) + Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.AdminPolicyBasedExternalRoute, err error) + AdminPolicyBasedExternalRouteExpansion +} + +// adminPolicyBasedExternalRoutes implements AdminPolicyBasedExternalRouteInterface +type adminPolicyBasedExternalRoutes struct { + client rest.Interface +} + +// newAdminPolicyBasedExternalRoutes returns a AdminPolicyBasedExternalRoutes +func newAdminPolicyBasedExternalRoutes(c *K8sV1Client) *adminPolicyBasedExternalRoutes { + return &adminPolicyBasedExternalRoutes{ + client: c.RESTClient(), + } +} + +// Get takes name of the adminPolicyBasedExternalRoute, and returns the corresponding adminPolicyBasedExternalRoute object, and an error if there is any. +func (c *adminPolicyBasedExternalRoutes) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Get(). + Resource("adminpolicybasedexternalroutes"). + Name(name). + VersionedParams(&options, scheme.ParameterCodec). + Do(ctx). + Into(result) + return +} + +// List takes label and field selectors, and returns the list of AdminPolicyBasedExternalRoutes that match those selectors. +func (c *adminPolicyBasedExternalRoutes) List(ctx context.Context, opts metav1.ListOptions) (result *v1.AdminPolicyBasedExternalRouteList, err error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + result = &v1.AdminPolicyBasedExternalRouteList{} + err = c.client.Get(). + Resource("adminpolicybasedexternalroutes"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Do(ctx). + Into(result) + return +} + +// Watch returns a watch.Interface that watches the requested adminPolicyBasedExternalRoutes. +func (c *adminPolicyBasedExternalRoutes) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + opts.Watch = true + return c.client.Get(). + Resource("adminpolicybasedexternalroutes"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Watch(ctx) +} + +// Create takes the representation of a adminPolicyBasedExternalRoute and creates it. Returns the server's representation of the adminPolicyBasedExternalRoute, and an error, if there is any. +func (c *adminPolicyBasedExternalRoutes) Create(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.CreateOptions) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Post(). + Resource("adminpolicybasedexternalroutes"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(adminPolicyBasedExternalRoute). + Do(ctx). + Into(result) + return +} + +// Update takes the representation of a adminPolicyBasedExternalRoute and updates it. Returns the server's representation of the adminPolicyBasedExternalRoute, and an error, if there is any. +func (c *adminPolicyBasedExternalRoutes) Update(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.UpdateOptions) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Put(). + Resource("adminpolicybasedexternalroutes"). + Name(adminPolicyBasedExternalRoute.Name). + VersionedParams(&opts, scheme.ParameterCodec). + Body(adminPolicyBasedExternalRoute). + Do(ctx). + Into(result) + return +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *adminPolicyBasedExternalRoutes) UpdateStatus(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.UpdateOptions) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Put(). + Resource("adminpolicybasedexternalroutes"). + Name(adminPolicyBasedExternalRoute.Name). + SubResource("status"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(adminPolicyBasedExternalRoute). + Do(ctx). + Into(result) + return +} + +// Delete takes name of the adminPolicyBasedExternalRoute and deletes it. Returns an error if one occurs. +func (c *adminPolicyBasedExternalRoutes) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { + return c.client.Delete(). + Resource("adminpolicybasedexternalroutes"). + Name(name). + Body(&opts). + Do(ctx). + Error() +} + +// DeleteCollection deletes a collection of objects. +func (c *adminPolicyBasedExternalRoutes) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { + var timeout time.Duration + if listOpts.TimeoutSeconds != nil { + timeout = time.Duration(*listOpts.TimeoutSeconds) * time.Second + } + return c.client.Delete(). + Resource("adminpolicybasedexternalroutes"). + VersionedParams(&listOpts, scheme.ParameterCodec). + Timeout(timeout). + Body(&opts). + Do(ctx). + Error() +} + +// Patch applies the patch and returns the patched adminPolicyBasedExternalRoute. +func (c *adminPolicyBasedExternalRoutes) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Patch(pt). + Resource("adminpolicybasedexternalroutes"). + Name(name). + SubResource(subresources...). + VersionedParams(&opts, scheme.ParameterCodec). + Body(data). + Do(ctx). + Into(result) + return +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedroute_client.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedroute_client.go new file mode 100644 index 0000000000..aeb89508e9 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedroute_client.go @@ -0,0 +1,88 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme" + rest "k8s.io/client-go/rest" +) + +type K8sV1Interface interface { + RESTClient() rest.Interface + AdminPolicyBasedExternalRoutesGetter +} + +// K8sV1Client is used to interact with features provided by the k8s.ovn.org group. +type K8sV1Client struct { + restClient rest.Interface +} + +func (c *K8sV1Client) AdminPolicyBasedExternalRoutes() AdminPolicyBasedExternalRouteInterface { + return newAdminPolicyBasedExternalRoutes(c) +} + +// NewForConfig creates a new K8sV1Client for the given config. +func NewForConfig(c *rest.Config) (*K8sV1Client, error) { + config := *c + if err := setConfigDefaults(&config); err != nil { + return nil, err + } + client, err := rest.RESTClientFor(&config) + if err != nil { + return nil, err + } + return &K8sV1Client{client}, nil +} + +// NewForConfigOrDie creates a new K8sV1Client for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *K8sV1Client { + client, err := NewForConfig(c) + if err != nil { + panic(err) + } + return client +} + +// New creates a new K8sV1Client for the given RESTClient. +func New(c rest.Interface) *K8sV1Client { + return &K8sV1Client{c} +} + +func setConfigDefaults(config *rest.Config) error { + gv := v1.SchemeGroupVersion + config.GroupVersion = &gv + config.APIPath = "/apis" + config.NegotiatedSerializer = scheme.Codecs.WithoutConversion() + + if config.UserAgent == "" { + config.UserAgent = rest.DefaultKubernetesUserAgent() + } + + return nil +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *K8sV1Client) RESTClient() rest.Interface { + if c == nil { + return nil + } + return c.restClient +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/doc.go new file mode 100644 index 0000000000..b22b05acdb --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated typed clients. +package v1 diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/doc.go new file mode 100644 index 0000000000..422564f2d5 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// Package fake has the automatically generated clients. +package fake diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedexternalroute.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedexternalroute.go new file mode 100644 index 0000000000..448b431522 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedexternalroute.go @@ -0,0 +1,132 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + "context" + + adminpolicybasedroutev1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + labels "k8s.io/apimachinery/pkg/labels" + schema "k8s.io/apimachinery/pkg/runtime/schema" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + testing "k8s.io/client-go/testing" +) + +// FakeAdminPolicyBasedExternalRoutes implements AdminPolicyBasedExternalRouteInterface +type FakeAdminPolicyBasedExternalRoutes struct { + Fake *FakeK8sV1 +} + +var adminpolicybasedexternalroutesResource = schema.GroupVersionResource{Group: "k8s.ovn.org", Version: "v1", Resource: "adminpolicybasedexternalroutes"} + +var adminpolicybasedexternalroutesKind = schema.GroupVersionKind{Group: "k8s.ovn.org", Version: "v1", Kind: "AdminPolicyBasedExternalRoute"} + +// Get takes name of the adminPolicyBasedExternalRoute, and returns the corresponding adminPolicyBasedExternalRoute object, and an error if there is any. +func (c *FakeAdminPolicyBasedExternalRoutes) Get(ctx context.Context, name string, options v1.GetOptions) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootGetAction(adminpolicybasedexternalroutesResource, name), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} + +// List takes label and field selectors, and returns the list of AdminPolicyBasedExternalRoutes that match those selectors. +func (c *FakeAdminPolicyBasedExternalRoutes) List(ctx context.Context, opts v1.ListOptions) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootListAction(adminpolicybasedexternalroutesResource, adminpolicybasedexternalroutesKind, opts), &adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList{}) + if obj == nil { + return nil, err + } + + label, _, _ := testing.ExtractFromListOptions(opts) + if label == nil { + label = labels.Everything() + } + list := &adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList{ListMeta: obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList).ListMeta} + for _, item := range obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList).Items { + if label.Matches(labels.Set(item.Labels)) { + list.Items = append(list.Items, item) + } + } + return list, err +} + +// Watch returns a watch.Interface that watches the requested adminPolicyBasedExternalRoutes. +func (c *FakeAdminPolicyBasedExternalRoutes) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + return c.Fake. + InvokesWatch(testing.NewRootWatchAction(adminpolicybasedexternalroutesResource, opts)) +} + +// Create takes the representation of a adminPolicyBasedExternalRoute and creates it. Returns the server's representation of the adminPolicyBasedExternalRoute, and an error, if there is any. +func (c *FakeAdminPolicyBasedExternalRoutes) Create(ctx context.Context, adminPolicyBasedExternalRoute *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, opts v1.CreateOptions) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootCreateAction(adminpolicybasedexternalroutesResource, adminPolicyBasedExternalRoute), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} + +// Update takes the representation of a adminPolicyBasedExternalRoute and updates it. Returns the server's representation of the adminPolicyBasedExternalRoute, and an error, if there is any. +func (c *FakeAdminPolicyBasedExternalRoutes) Update(ctx context.Context, adminPolicyBasedExternalRoute *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, opts v1.UpdateOptions) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootUpdateAction(adminpolicybasedexternalroutesResource, adminPolicyBasedExternalRoute), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *FakeAdminPolicyBasedExternalRoutes) UpdateStatus(ctx context.Context, adminPolicyBasedExternalRoute *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, opts v1.UpdateOptions) (*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, error) { + obj, err := c.Fake. + Invokes(testing.NewRootUpdateSubresourceAction(adminpolicybasedexternalroutesResource, "status", adminPolicyBasedExternalRoute), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} + +// Delete takes name of the adminPolicyBasedExternalRoute and deletes it. Returns an error if one occurs. +func (c *FakeAdminPolicyBasedExternalRoutes) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + _, err := c.Fake. + Invokes(testing.NewRootDeleteAction(adminpolicybasedexternalroutesResource, name), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + return err +} + +// DeleteCollection deletes a collection of objects. +func (c *FakeAdminPolicyBasedExternalRoutes) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + action := testing.NewRootDeleteCollectionAction(adminpolicybasedexternalroutesResource, listOpts) + + _, err := c.Fake.Invokes(action, &adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList{}) + return err +} + +// Patch applies the patch and returns the patched adminPolicyBasedExternalRoute. +func (c *FakeAdminPolicyBasedExternalRoutes) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootPatchSubresourceAction(adminpolicybasedexternalroutesResource, name, pt, data, subresources...), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedroute_client.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedroute_client.go new file mode 100644 index 0000000000..c6de68dffb --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedroute_client.go @@ -0,0 +1,39 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1" + rest "k8s.io/client-go/rest" + testing "k8s.io/client-go/testing" +) + +type FakeK8sV1 struct { + *testing.Fake +} + +func (c *FakeK8sV1) AdminPolicyBasedExternalRoutes() v1.AdminPolicyBasedExternalRouteInterface { + return &FakeAdminPolicyBasedExternalRoutes{c} +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *FakeK8sV1) RESTClient() rest.Interface { + var ret *rest.RESTClient + return ret +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/generated_expansion.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/generated_expansion.go new file mode 100644 index 0000000000..e933837f77 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/generated_expansion.go @@ -0,0 +1,20 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +type AdminPolicyBasedExternalRouteExpansion interface{} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/interface.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/interface.go new file mode 100644 index 0000000000..d455c82fbf --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/interface.go @@ -0,0 +1,45 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package adminpolicybasedroute + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces" +) + +// Interface provides access to each of this group's versions. +type Interface interface { + // V1 provides access to shared informers for resources in V1. + V1() v1.Interface +} + +type group struct { + factory internalinterfaces.SharedInformerFactory + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// New returns a new Interface. +func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { + return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} +} + +// V1 returns a new v1.Interface. +func (g *group) V1() v1.Interface { + return v1.New(g.factory, g.namespace, g.tweakListOptions) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go new file mode 100644 index 0000000000..e8ff325a47 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go @@ -0,0 +1,88 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package v1 + +import ( + "context" + time "time" + + adminpolicybasedroutev1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces" + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// AdminPolicyBasedExternalRouteInformer provides access to a shared informer and lister for +// AdminPolicyBasedExternalRoutes. +type AdminPolicyBasedExternalRouteInformer interface { + Informer() cache.SharedIndexInformer + Lister() v1.AdminPolicyBasedExternalRouteLister +} + +type adminPolicyBasedExternalRouteInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// NewAdminPolicyBasedExternalRouteInformer constructs a new informer for AdminPolicyBasedExternalRoute type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewAdminPolicyBasedExternalRouteInformer(client versioned.Interface, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredAdminPolicyBasedExternalRouteInformer(client, resyncPeriod, indexers, nil) +} + +// NewFilteredAdminPolicyBasedExternalRouteInformer constructs a new informer for AdminPolicyBasedExternalRoute type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredAdminPolicyBasedExternalRouteInformer(client versioned.Interface, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.K8sV1().AdminPolicyBasedExternalRoutes().List(context.TODO(), options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.K8sV1().AdminPolicyBasedExternalRoutes().Watch(context.TODO(), options) + }, + }, + &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}, + resyncPeriod, + indexers, + ) +} + +func (f *adminPolicyBasedExternalRouteInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredAdminPolicyBasedExternalRouteInformer(client, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *adminPolicyBasedExternalRouteInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}, f.defaultInformer) +} + +func (f *adminPolicyBasedExternalRouteInformer) Lister() v1.AdminPolicyBasedExternalRouteLister { + return v1.NewAdminPolicyBasedExternalRouteLister(f.Informer().GetIndexer()) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/interface.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/interface.go new file mode 100644 index 0000000000..bef8d47165 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/interface.go @@ -0,0 +1,44 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package v1 + +import ( + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces" +) + +// Interface provides access to all the informers in this group version. +type Interface interface { + // AdminPolicyBasedExternalRoutes returns a AdminPolicyBasedExternalRouteInformer. + AdminPolicyBasedExternalRoutes() AdminPolicyBasedExternalRouteInformer +} + +type version struct { + factory internalinterfaces.SharedInformerFactory + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// New returns a new Interface. +func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { + return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} +} + +// AdminPolicyBasedExternalRoutes returns a AdminPolicyBasedExternalRouteInformer. +func (v *version) AdminPolicyBasedExternalRoutes() AdminPolicyBasedExternalRouteInformer { + return &adminPolicyBasedExternalRouteInformer{factory: v.factory, tweakListOptions: v.tweakListOptions} +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/factory.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/factory.go new file mode 100644 index 0000000000..b3332112ec --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/factory.go @@ -0,0 +1,179 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package externalversions + +import ( + reflect "reflect" + sync "sync" + time "time" + + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + adminpolicybasedroute "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + cache "k8s.io/client-go/tools/cache" +) + +// SharedInformerOption defines the functional option type for SharedInformerFactory. +type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory + +type sharedInformerFactory struct { + client versioned.Interface + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc + lock sync.Mutex + defaultResync time.Duration + customResync map[reflect.Type]time.Duration + + informers map[reflect.Type]cache.SharedIndexInformer + // startedInformers is used for tracking which informers have been started. + // This allows Start() to be called multiple times safely. + startedInformers map[reflect.Type]bool +} + +// WithCustomResyncConfig sets a custom resync period for the specified informer types. +func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + for k, v := range resyncConfig { + factory.customResync[reflect.TypeOf(k)] = v + } + return factory + } +} + +// WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory. +func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.tweakListOptions = tweakListOptions + return factory + } +} + +// WithNamespace limits the SharedInformerFactory to the specified namespace. +func WithNamespace(namespace string) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.namespace = namespace + return factory + } +} + +// NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces. +func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory { + return NewSharedInformerFactoryWithOptions(client, defaultResync) +} + +// NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory. +// Listers obtained via this SharedInformerFactory will be subject to the same filters +// as specified here. +// Deprecated: Please use NewSharedInformerFactoryWithOptions instead +func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory { + return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions)) +} + +// NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options. +func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory { + factory := &sharedInformerFactory{ + client: client, + namespace: v1.NamespaceAll, + defaultResync: defaultResync, + informers: make(map[reflect.Type]cache.SharedIndexInformer), + startedInformers: make(map[reflect.Type]bool), + customResync: make(map[reflect.Type]time.Duration), + } + + // Apply all options + for _, opt := range options { + factory = opt(factory) + } + + return factory +} + +// Start initializes all requested informers. +func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) { + f.lock.Lock() + defer f.lock.Unlock() + + for informerType, informer := range f.informers { + if !f.startedInformers[informerType] { + go informer.Run(stopCh) + f.startedInformers[informerType] = true + } + } +} + +// WaitForCacheSync waits for all started informers' cache were synced. +func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { + informers := func() map[reflect.Type]cache.SharedIndexInformer { + f.lock.Lock() + defer f.lock.Unlock() + + informers := map[reflect.Type]cache.SharedIndexInformer{} + for informerType, informer := range f.informers { + if f.startedInformers[informerType] { + informers[informerType] = informer + } + } + return informers + }() + + res := map[reflect.Type]bool{} + for informType, informer := range informers { + res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced) + } + return res +} + +// InternalInformerFor returns the SharedIndexInformer for obj using an internal +// client. +func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer { + f.lock.Lock() + defer f.lock.Unlock() + + informerType := reflect.TypeOf(obj) + informer, exists := f.informers[informerType] + if exists { + return informer + } + + resyncPeriod, exists := f.customResync[informerType] + if !exists { + resyncPeriod = f.defaultResync + } + + informer = newFunc(f.client, resyncPeriod) + f.informers[informerType] = informer + + return informer +} + +// SharedInformerFactory provides shared informers for resources in all known +// API group versions. +type SharedInformerFactory interface { + internalinterfaces.SharedInformerFactory + ForResource(resource schema.GroupVersionResource) (GenericInformer, error) + WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool + + K8s() adminpolicybasedroute.Interface +} + +func (f *sharedInformerFactory) K8s() adminpolicybasedroute.Interface { + return adminpolicybasedroute.New(f, f.namespace, f.tweakListOptions) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/generic.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/generic.go new file mode 100644 index 0000000000..f3473df83c --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/generic.go @@ -0,0 +1,61 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package externalversions + +import ( + "fmt" + + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + schema "k8s.io/apimachinery/pkg/runtime/schema" + cache "k8s.io/client-go/tools/cache" +) + +// GenericInformer is type of SharedIndexInformer which will locate and delegate to other +// sharedInformers based on type +type GenericInformer interface { + Informer() cache.SharedIndexInformer + Lister() cache.GenericLister +} + +type genericInformer struct { + informer cache.SharedIndexInformer + resource schema.GroupResource +} + +// Informer returns the SharedIndexInformer. +func (f *genericInformer) Informer() cache.SharedIndexInformer { + return f.informer +} + +// Lister returns the GenericLister. +func (f *genericInformer) Lister() cache.GenericLister { + return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource) +} + +// ForResource gives generic access to a shared informer of the matching type +// TODO extend this to unknown resources with a client pool +func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) { + switch resource { + // Group=k8s.ovn.org, Version=v1 + case v1.SchemeGroupVersion.WithResource("adminpolicybasedexternalroutes"): + return &genericInformer{resource: resource.GroupResource(), informer: f.K8s().V1().AdminPolicyBasedExternalRoutes().Informer()}, nil + + } + + return nil, fmt.Errorf("no informer found for %v", resource) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go new file mode 100644 index 0000000000..1a2c758db5 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go @@ -0,0 +1,39 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package internalinterfaces + +import ( + time "time" + + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + cache "k8s.io/client-go/tools/cache" +) + +// NewInformerFunc takes versioned.Interface and time.Duration to return a SharedIndexInformer. +type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer + +// SharedInformerFactory a small interface to allow for adding an informer without an import cycle +type SharedInformerFactory interface { + Start(stopCh <-chan struct{}) + InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer +} + +// TweakListOptionsFunc is a function that transforms a v1.ListOptions. +type TweakListOptionsFunc func(*v1.ListOptions) diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go new file mode 100644 index 0000000000..d126826372 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go @@ -0,0 +1,67 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" +) + +// AdminPolicyBasedExternalRouteLister helps list AdminPolicyBasedExternalRoutes. +// All objects returned here must be treated as read-only. +type AdminPolicyBasedExternalRouteLister interface { + // List lists all AdminPolicyBasedExternalRoutes in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1.AdminPolicyBasedExternalRoute, err error) + // Get retrieves the AdminPolicyBasedExternalRoute from the index for a given name. + // Objects returned here must be treated as read-only. + Get(name string) (*v1.AdminPolicyBasedExternalRoute, error) + AdminPolicyBasedExternalRouteListerExpansion +} + +// adminPolicyBasedExternalRouteLister implements the AdminPolicyBasedExternalRouteLister interface. +type adminPolicyBasedExternalRouteLister struct { + indexer cache.Indexer +} + +// NewAdminPolicyBasedExternalRouteLister returns a new AdminPolicyBasedExternalRouteLister. +func NewAdminPolicyBasedExternalRouteLister(indexer cache.Indexer) AdminPolicyBasedExternalRouteLister { + return &adminPolicyBasedExternalRouteLister{indexer: indexer} +} + +// List lists all AdminPolicyBasedExternalRoutes in the indexer. +func (s *adminPolicyBasedExternalRouteLister) List(selector labels.Selector) (ret []*v1.AdminPolicyBasedExternalRoute, err error) { + err = cache.ListAll(s.indexer, selector, func(m interface{}) { + ret = append(ret, m.(*v1.AdminPolicyBasedExternalRoute)) + }) + return ret, err +} + +// Get retrieves the AdminPolicyBasedExternalRoute from the index for a given name. +func (s *adminPolicyBasedExternalRouteLister) Get(name string) (*v1.AdminPolicyBasedExternalRoute, error) { + obj, exists, err := s.indexer.GetByKey(name) + if err != nil { + return nil, err + } + if !exists { + return nil, errors.NewNotFound(v1.Resource("adminpolicybasedexternalroute"), name) + } + return obj.(*v1.AdminPolicyBasedExternalRoute), nil +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/expansion_generated.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/expansion_generated.go new file mode 100644 index 0000000000..203e145172 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/expansion_generated.go @@ -0,0 +1,22 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package v1 + +// AdminPolicyBasedExternalRouteListerExpansion allows custom methods to be added to +// AdminPolicyBasedExternalRouteLister. +type AdminPolicyBasedExternalRouteListerExpansion interface{} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/doc.go new file mode 100644 index 0000000000..7b121f971b --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/doc.go @@ -0,0 +1,4 @@ +// Package v1 contains API Schema definitions for the network v1 API group +// +k8s:deepcopy-gen=package,register +// +groupName=k8s.ovn.org +package v1 diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/register.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/register.go new file mode 100644 index 0000000000..876b6e355c --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/register.go @@ -0,0 +1,29 @@ +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + GroupName = "k8s.ovn.org" + SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1"} + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + AddToScheme = SchemeBuilder.AddToScheme +) + +// Resource takes an unqualified resource and returns a Group qualified GroupResource +func Resource(resource string) schema.GroupResource { + return SchemeGroupVersion.WithResource(resource).GroupResource() +} + +// Adds the list of known types to api.Scheme. +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &AdminPolicyBasedExternalRoute{}, + &AdminPolicyBasedExternalRouteList{}, + ) + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/types.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/types.go new file mode 100644 index 0000000000..d9eac0c3db --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/types.go @@ -0,0 +1,166 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +genclient:nonNamespaced +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:resource:path=adminpolicybasedexternalroutes,scope=Cluster,shortName=apbexternalroute,singular=adminpolicybasedexternalroute +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Last Update",type="date",JSONPath=`.status.lastTransitionTime` +// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=`.status.status` +// AdminPolicyBasedExternalRoute is a CRD allowing the cluster administrators to configure policies for external gateway IPs to be applied to all the pods contained in selected namespaces. +// Egress traffic from the pods that belong to the selected namespaces to outside the cluster is routed through these external gateway IPs. +type AdminPolicyBasedExternalRoute struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + // +kubebuilder:validation:Required + // +required + Spec AdminPolicyBasedExternalRouteSpec `json:"spec"` + // +optional + Status AdminPolicyBasedRouteStatus `json:"status,omitempty"` +} + +// AdminPolicyBasedExternalRouteSpec defines the desired state of AdminPolicyBasedExternalRoute +type AdminPolicyBasedExternalRouteSpec struct { + // From defines the selectors that will determine the target namespaces to this CR. + From ExternalNetworkSource `json:"from"` + // NextHops defines two types of hops: Static and Dynamic. Each hop defines at least one external gateway IP. + NextHops ExternalNextHops `json:"nextHops"` +} + +// ExternalNetworkSource contains the selectors used to determine the namespaces where the policy will be applied to +type ExternalNetworkSource struct { + // NamespaceSelector defines a selector to be used to determine which namespaces will be targeted by this CR + NamespaceSelector metav1.LabelSelector `json:"namespaceSelector"` +} + +// +kubebuilder:validation:MinProperties:=1 +// ExternalNextHops contains slices of StaticHops and DynamicHops structures. Minimum is one StaticHop or one DynamicHop. +type ExternalNextHops struct { + // StaticHops defines a slice of StaticHop. This field is optional. + StaticHops []*StaticHop `json:"static,omitempty"` + //DynamicHops defines a slices of DynamicHop. This field is optional. + DynamicHops []*DynamicHop `json:"dynamic,omitempty"` +} + +// StaticHop defines the configuration of a static IP that acts as an external Gateway Interface. IP field is mandatory. +type StaticHop struct { + //IP defines the static IP to be used for egress traffic. The IP can be either IPv4 or IPv6. + // +kubebuilder:validation:Required + // +required + IP string `json:"ip"` + // BFDEnabled determines if the interface implements the Bidirectional Forward Detection protocol. Defaults to false. + // +optional + // +kubebuilder:default:=false + // +default=false + BFDEnabled bool `json:"bfdEnabled,omitempty"` + // SkipHostSNAT determines whether to disable Source NAT to the host IP. Defaults to false. + // +optional + // +kubebuilder:default:=false + // +default=false + // SkipHostSNAT bool `json:"skipHostSNAT,omitempty"` +} + +// DynamicHop defines the configuration for a dynamic external gateway interface. +// These interfaces are wrapped around a pod object that resides inside the cluster. +// The field NetworkAttachmentName captures the name of the multus network name to use when retrieving the gateway IP to use. +// The PodSelector and the NamespaceSelector are mandatory fields. +type DynamicHop struct { + // PodSelector defines the selector to filter the pods that are external gateways. + // +kubebuilder:validation:Required + // +required + PodSelector metav1.LabelSelector `json:"podSelector"` + // NamespaceSelector defines a selector to filter the namespaces where the pod gateways are located. + // +kubebuilder:validation:Optional + // +optional + NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector"` + // NetworkAttachmentName determines the multus network name to use when retrieving the pod IPs that will be used as the gateway IP. + // When this field is empty, the logic assumes that the pod is configured with HostNetwork and is using the node's IP as gateway. + // +optional + // +kubebuilder:default="" + // +default="" + NetworkAttachmentName string `json:"networkAttachmentName,omitempty"` + // BFDEnabled determines if the interface implements the Bidirectional Forward Detection protocol. Defaults to false. + // +optional + // +kubebuilder:default:=false + // +default=false + BFDEnabled bool `json:"bfdEnabled,omitempty"` + // SkipHostSNAT determines whether to disable Source NAT to the host IP. Defaults to false + // +optional + // +kubebuilder:default:=false + // +default=false + // SkipHostSNAT bool `json:"skipHostSNAT,omitempty"` +} + +// +kubebuilder:object:root=true +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// AdminPolicyBasedExternalRouteList contains a list of AdminPolicyBasedExternalRoutes +type AdminPolicyBasedExternalRouteList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []AdminPolicyBasedExternalRoute `json:"items"` +} + +// AdminPolicyBasedRouteStatus contains the observed status of the AdminPolicyBased route types. +type AdminPolicyBasedRouteStatus struct { + // Captures the time when the last change was applied. + LastTransitionTime metav1.Time `json:"lastTransitionTime"` + // An array of Human-readable messages indicating details about the status of the object. + Messages []string `json:"messages"` + // A concise indication of whether the AdminPolicyBasedRoute resource is applied with success + Status StatusType `json:"status"` +} + +// StatusType defines the types of status used in the Status field. The value determines if the +// deployment of the CR was successful or if it failed. +type StatusType string + +const ( + SuccessStatus StatusType = "Success" + FailStatus StatusType = "Fail" +) + +// // +genclient +// // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// // +kubebuilder:resource:path=adminpolicybasedexternalroute,scope=Cluster +// // +kubebuilder:object:root=true +// // +kubebuilder:subresource:status +// type AdminPolicyBasedInternalRoute struct { +// metav1.TypeMeta `json:",inline"` +// metav1.ObjectMeta `json:"metadata,omitempty"` +// Spec AdminPolicyBasedInternalRouteSpec `json:"spec,omitempty"` +// Status AdminPolicyBasedRouteStatus `json:"status,omitempty"` +// } + +// // AdminPolicyBasedInternalRouteSpec defines the desired state of AdminPolicyBasedInternalRoute +// type AdminPolicyBasedInternalRouteSpec struct { +// } + +// // +kubebuilder:object:root=true +// // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// // AdminPolicyBasedExternalRouteList contains a list of AdminPolicyBasedExternalRoutes +// type AdminPolicyBasedInternalRouteList struct { +// metav1.TypeMeta `json:",inline"` +// metav1.ListMeta `json:"metadata,omitempty"` +// Items []AdminPolicyBasedInternalRoute `json:"items"` +// } diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/zz_generated.deepcopy.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/zz_generated.deepcopy.go new file mode 100644 index 0000000000..e6d4c308cc --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/zz_generated.deepcopy.go @@ -0,0 +1,220 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdminPolicyBasedExternalRoute) DeepCopyInto(out *AdminPolicyBasedExternalRoute) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdminPolicyBasedExternalRoute. +func (in *AdminPolicyBasedExternalRoute) DeepCopy() *AdminPolicyBasedExternalRoute { + if in == nil { + return nil + } + out := new(AdminPolicyBasedExternalRoute) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *AdminPolicyBasedExternalRoute) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdminPolicyBasedExternalRouteList) DeepCopyInto(out *AdminPolicyBasedExternalRouteList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]AdminPolicyBasedExternalRoute, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdminPolicyBasedExternalRouteList. +func (in *AdminPolicyBasedExternalRouteList) DeepCopy() *AdminPolicyBasedExternalRouteList { + if in == nil { + return nil + } + out := new(AdminPolicyBasedExternalRouteList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *AdminPolicyBasedExternalRouteList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdminPolicyBasedExternalRouteSpec) DeepCopyInto(out *AdminPolicyBasedExternalRouteSpec) { + *out = *in + in.From.DeepCopyInto(&out.From) + in.NextHops.DeepCopyInto(&out.NextHops) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdminPolicyBasedExternalRouteSpec. +func (in *AdminPolicyBasedExternalRouteSpec) DeepCopy() *AdminPolicyBasedExternalRouteSpec { + if in == nil { + return nil + } + out := new(AdminPolicyBasedExternalRouteSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdminPolicyBasedRouteStatus) DeepCopyInto(out *AdminPolicyBasedRouteStatus) { + *out = *in + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + if in.Messages != nil { + in, out := &in.Messages, &out.Messages + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdminPolicyBasedRouteStatus. +func (in *AdminPolicyBasedRouteStatus) DeepCopy() *AdminPolicyBasedRouteStatus { + if in == nil { + return nil + } + out := new(AdminPolicyBasedRouteStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DynamicHop) DeepCopyInto(out *DynamicHop) { + *out = *in + in.PodSelector.DeepCopyInto(&out.PodSelector) + if in.NamespaceSelector != nil { + in, out := &in.NamespaceSelector, &out.NamespaceSelector + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamicHop. +func (in *DynamicHop) DeepCopy() *DynamicHop { + if in == nil { + return nil + } + out := new(DynamicHop) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExternalNetworkSource) DeepCopyInto(out *ExternalNetworkSource) { + *out = *in + in.NamespaceSelector.DeepCopyInto(&out.NamespaceSelector) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExternalNetworkSource. +func (in *ExternalNetworkSource) DeepCopy() *ExternalNetworkSource { + if in == nil { + return nil + } + out := new(ExternalNetworkSource) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExternalNextHops) DeepCopyInto(out *ExternalNextHops) { + *out = *in + if in.StaticHops != nil { + in, out := &in.StaticHops, &out.StaticHops + *out = make([]*StaticHop, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(StaticHop) + **out = **in + } + } + } + if in.DynamicHops != nil { + in, out := &in.DynamicHops, &out.DynamicHops + *out = make([]*DynamicHop, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(DynamicHop) + (*in).DeepCopyInto(*out) + } + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExternalNextHops. +func (in *ExternalNextHops) DeepCopy() *ExternalNextHops { + if in == nil { + return nil + } + out := new(ExternalNextHops) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StaticHop) DeepCopyInto(out *StaticHop) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StaticHop. +func (in *StaticHop) DeepCopy() *StaticHop { + if in == nil { + return nil + } + out := new(StaticHop) + in.DeepCopyInto(out) + return out +} From 3914cad03f75dbdb0494e9ceac4359d8eba0870a Mon Sep 17 00:00:00 2001 From: jordigilh Date: Sat, 8 Apr 2023 10:51:27 -0400 Subject: [PATCH 68/90] Summary of changes: * Implements controllers for Admin Policy Based External Route to handle changes to namespaces, pods and admin policy based external route CRs. * Initialize in master node to handle interactions with the north bound DB. Initialize in worker nodes to handle changes to the conntrack (delete ECMP entries when a gateway IP is no longer a valid external gateway IP) * Implements repair() function for the master node. * Integrates with the annotation logic to avoid duplications in cache by sharing the externalGWCache and EXGWCacheMutex objects between the annotation and controller logic. * Updates the annotation logic to ensure the namespace anontation k8s.ovn.org/external-gw-pod-ips is updated when changes occur in a CR instance that coexists in the same namespace and that can impact the list of dynamic gateway IPs. * The implementation no longer relies on namespace annotations, including "k8s.ovn.org/external-gw-pod-ips", instead it uses its own cache structure to identify the valid pod IPs for a given namespace. * Implement E2E tests for admin policy based external route. The tests are a duplication of the existing annotated based logic for external gateways using the CR instead. Signed-off-by: jordigilh --- README.md | 2 + contrib/kind.sh | 1 + dist/images/daemonset.sh | 1 + ...org_adminpolicybasedexternalroutes.yaml.j2 | 289 ++ dist/templates/ovn-setup.yaml.j2 | 6 + go-controller/pkg/factory/factory.go | 12 +- .../pkg/factory/mocks/NodeWatchFactory.go | 34 + go-controller/pkg/factory/types.go | 4 + go-controller/pkg/kube/kube.go | 2 + go-controller/pkg/kube/mocks/Interface.go | 153 +- go-controller/pkg/libovsdbops/router.go | 18 + .../network_controller_manager.go | 1 + .../node_network_controller_manager.go | 27 +- .../base_node_network_controller_dpu_test.go | 4 +- .../node/default_node_network_controller.go | 60 +- .../pkg/node/gateway_init_linux_test.go | 6 +- go-controller/pkg/node/ovn_test.go | 8 +- .../apbroute/apbroute_suite_test.go | 13 + .../apbroute/external_controller.go | 372 ++ .../apbroute/external_controller_namespace.go | 137 + .../external_controller_namespace_test.go | 358 ++ .../apbroute/external_controller_pod.go | 436 +++ .../apbroute/external_controller_pod_test.go | 500 +++ .../apbroute/external_controller_policy.go | 722 ++++ .../external_controller_policy_test.go | 723 ++++ .../controller/apbroute/master_controller.go | 568 +++ .../ovn/controller/apbroute/network_client.go | 744 ++++ .../controller/apbroute/node_controller.go | 496 +++ .../pkg/ovn/controller/apbroute/repair.go | 367 ++ .../pkg/ovn/default_network_controller.go | 37 +- go-controller/pkg/ovn/egressgw.go | 348 +- .../pkg/ovn/external_gateway_test.go | 2891 +++++++++++++++ go-controller/pkg/ovn/ovn_test.go | 33 +- go-controller/pkg/util/kube.go | 20 +- go-controller/pkg/util/net_linux.go | 1 - test/e2e/e2e.go | 11 +- test/e2e/external_gateways.go | 3084 ++++++++++++----- 37 files changed, 11258 insertions(+), 1231 deletions(-) create mode 100644 dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 create mode 100644 go-controller/pkg/ovn/controller/apbroute/apbroute_suite_test.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_namespace.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_namespace_test.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_pod.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_pod_test.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_policy.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_policy_test.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/master_controller.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/network_client.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/node_controller.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/repair.go create mode 100644 go-controller/pkg/ovn/external_gateway_test.go diff --git a/README.md b/README.md index f424a62ffd..ec1354bb25 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,8 @@ kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/ovn kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/k8s.ovn.org_egressips.yaml # create egressfirewalls.k8s.ovn.org CRD kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/k8s.ovn.org_egressfirewalls.yaml +# create adminpolicybasedexternalroute.k8s.ovn.org CRD +kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/k8s.ovn.org_adminpolicybasedexternalroutes.yaml # Run ovnkube-db deployment. kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/ovnkube-db.yaml diff --git a/contrib/kind.sh b/contrib/kind.sh index ed2d5b2d32..9a9060a5bf 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -836,6 +836,7 @@ install_ovn() { run_kubectl apply -f k8s.ovn.org_egressips.yaml run_kubectl apply -f k8s.ovn.org_egressqoses.yaml run_kubectl apply -f k8s.ovn.org_egressservices.yaml + run_kubectl apply -f k8s.ovn.org_adminpolicybasedexternalroutes.yaml run_kubectl apply -f ovn-setup.yaml MASTER_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}" | sort | head -n "${KIND_NUM_MASTER}") # We want OVN HA not Kubernetes HA diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index a6a1f7894f..5711ee3017 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -649,5 +649,6 @@ cp ../templates/k8s.ovn.org_egressfirewalls.yaml.j2 ${output_dir}/k8s.ovn.org_eg cp ../templates/k8s.ovn.org_egressips.yaml.j2 ${output_dir}/k8s.ovn.org_egressips.yaml cp ../templates/k8s.ovn.org_egressqoses.yaml.j2 ${output_dir}/k8s.ovn.org_egressqoses.yaml cp ../templates/k8s.ovn.org_egressservices.yaml.j2 ${output_dir}/k8s.ovn.org_egressservices.yaml +cp ../templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 ${output_dir}/k8s.ovn.org_adminpolicybasedexternalroutes.yaml exit 0 diff --git a/dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 b/dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 new file mode 100644 index 0000000000..a0eb26a0a8 --- /dev/null +++ b/dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 @@ -0,0 +1,289 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.10.0 + creationTimestamp: null + name: adminpolicybasedexternalroutes.k8s.ovn.org +spec: + group: k8s.ovn.org + names: + kind: AdminPolicyBasedExternalRoute + listKind: AdminPolicyBasedExternalRouteList + plural: adminpolicybasedexternalroutes + shortNames: + - apbexternalroute + singular: adminpolicybasedexternalroute + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.lastTransitionTime + name: Last Update + type: date + - jsonPath: .status.status + name: Status + type: string + name: v1 + schema: + openAPIV3Schema: + description: AdminPolicyBasedExternalRoute is a CRD allowing the cluster administrators + to configure policies for external gateway IPs to be applied to all the + pods contained in selected namespaces. Egress traffic from the pods that + belong to the selected namespaces to outside the cluster is routed through + these external gateway IPs. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: AdminPolicyBasedExternalRouteSpec defines the desired state + of AdminPolicyBasedExternalRoute + properties: + from: + description: From defines the selectors that will determine the target + namespaces to this CR. + properties: + namespaceSelector: + description: NamespaceSelector defines a selector to be used to + determine which namespaces will be targeted by this CR + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that relates + the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, NotIn, + Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If + the operator is In or NotIn, the values array must + be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced + during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A + single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field is "key", + the operator is "In", and the values array contains only + "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - namespaceSelector + type: object + nextHops: + description: 'NextHops defines two types of hops: Static and Dynamic. + Each hop defines at least one external gateway IP.' + minProperties: 1 + properties: + dynamic: + description: DynamicHops defines a slices of DynamicHop. This + field is optional. + items: + description: DynamicHop defines the configuration for a dynamic + external gateway interface. These interfaces are wrapped around + a pod object that resides inside the cluster. The field NetworkAttachmentName + captures the name of the multus network name to use when retrieving + the gateway IP to use. The PodSelector and the NamespaceSelector + are mandatory fields. + properties: + bfdEnabled: + default: false + description: BFDEnabled determines if the interface implements + the Bidirectional Forward Detection protocol. Defaults + to false. + type: boolean + namespaceSelector: + description: NamespaceSelector defines a selector to filter + the namespaces where the pod gateways are located. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + networkAttachmentName: + default: "" + description: NetworkAttachmentName determines the multus + network name to use when retrieving the pod IPs that will + be used as the gateway IP. When this field is empty, the + logic assumes that the pod is configured with HostNetwork + and is using the node's IP as gateway. + type: string + podSelector: + description: PodSelector defines the selector to filter + the pods that are external gateways. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - podSelector + type: object + type: array + static: + description: StaticHops defines a slice of StaticHop. This field + is optional. + items: + description: StaticHop defines the configuration of a static + IP that acts as an external Gateway Interface. IP field is + mandatory. + properties: + bfdEnabled: + default: false + description: BFDEnabled determines if the interface implements + the Bidirectional Forward Detection protocol. Defaults + to false. + type: boolean + ip: + description: IP defines the static IP to be used for egress + traffic. The IP can be either IPv4 or IPv6. + type: string + required: + - ip + type: object + type: array + type: object + required: + - from + - nextHops + type: object + status: + description: AdminPolicyBasedRouteStatus contains the observed status + of the AdminPolicyBased route types. + properties: + lastTransitionTime: + description: Captures the time when the last change was applied. + format: date-time + type: string + messages: + description: An array of Human-readable messages indicating details + about the status of the object. + items: + type: string + type: array + status: + description: A concise indication of whether the AdminPolicyBasedRoute + resource is applied with success + type: string + required: + - lastTransitionTime + - messages + - status + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/dist/templates/ovn-setup.yaml.j2 b/dist/templates/ovn-setup.yaml.j2 index 1319ee5b4b..041012a073 100644 --- a/dist/templates/ovn-setup.yaml.j2 +++ b/dist/templates/ovn-setup.yaml.j2 @@ -87,7 +87,13 @@ rules: - egressqoses - egressservices - egressservices/status + - adminpolicybasedexternalroutes verbs: ["list", "get", "watch", "update", "patch"] +- apiGroups: + - k8s.ovn.org + resources: + - adminpolicybasedexternalroutes/status + verbs: [ "update"] - apiGroups: - apiextensions.k8s.io resources: diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 04472d3c2f..8575a25b70 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -354,6 +354,13 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( return nil, err } + var err error + wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, + defaultNumEventQueues) + if err != nil { + return nil, err + } + // For Services and Endpoints, pre-populate the shared Informer with one that // has a label selector excluding headless services. wf.iFactory.InformerFor(&kapi.Service{}, func(c kubernetes.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { @@ -394,7 +401,6 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( withServiceNameAndNoHeadlessServiceSelector()) }) - var err error wf.informers[NamespaceType], err = newInformer(NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer()) if err != nil { return nil, err @@ -983,8 +989,8 @@ func (wf *WatchFactory) PodCoreInformer() v1coreinformers.PodInformer { return wf.iFactory.Core().V1().Pods() } -func (wf *WatchFactory) NamespaceInformer() cache.SharedIndexInformer { - return wf.informers[NamespaceType].inf +func (wf *WatchFactory) NamespaceInformer() v1coreinformers.NamespaceInformer { + return wf.iFactory.Core().V1().Namespaces() } func (wf *WatchFactory) ServiceInformer() cache.SharedIndexInformer { diff --git a/go-controller/pkg/factory/mocks/NodeWatchFactory.go b/go-controller/pkg/factory/mocks/NodeWatchFactory.go index 67719f0540..8b2fe1629f 100644 --- a/go-controller/pkg/factory/mocks/NodeWatchFactory.go +++ b/go-controller/pkg/factory/mocks/NodeWatchFactory.go @@ -8,6 +8,8 @@ import ( factory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + informerscorev1 "k8s.io/client-go/informers/core/v1" + labels "k8s.io/apimachinery/pkg/labels" mock "github.com/stretchr/testify/mock" @@ -381,6 +383,22 @@ func (_m *NodeWatchFactory) LocalPodInformer() cache.SharedIndexInformer { return r0 } +// NamespaceInformer provides a mock function with given fields: +func (_m *NodeWatchFactory) NamespaceInformer() informerscorev1.NamespaceInformer { + ret := _m.Called() + + var r0 informerscorev1.NamespaceInformer + if rf, ok := ret.Get(0).(func() informerscorev1.NamespaceInformer); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(informerscorev1.NamespaceInformer) + } + } + + return r0 +} + // NodeInformer provides a mock function with given fields: func (_m *NodeWatchFactory) NodeInformer() cache.SharedIndexInformer { ret := _m.Called() @@ -397,6 +415,22 @@ func (_m *NodeWatchFactory) NodeInformer() cache.SharedIndexInformer { return r0 } +// PodCoreInformer provides a mock function with given fields: +func (_m *NodeWatchFactory) PodCoreInformer() informerscorev1.PodInformer { + ret := _m.Called() + + var r0 informerscorev1.PodInformer + if rf, ok := ret.Get(0).(func() informerscorev1.PodInformer); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(informerscorev1.PodInformer) + } + } + + return r0 +} + // RemoveEndpointSliceHandler provides a mock function with given fields: handler func (_m *NodeWatchFactory) RemoveEndpointSliceHandler(handler *factory.Handler) { _m.Called(handler) diff --git a/go-controller/pkg/factory/types.go b/go-controller/pkg/factory/types.go index b4e2baaa5f..a80a120fef 100644 --- a/go-controller/pkg/factory/types.go +++ b/go-controller/pkg/factory/types.go @@ -4,6 +4,8 @@ import ( kapi "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" "k8s.io/apimachinery/pkg/labels" + coreinformers "k8s.io/client-go/informers/core/v1" + v1coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/tools/cache" ) @@ -46,6 +48,8 @@ type NodeWatchFactory interface { NodeInformer() cache.SharedIndexInformer LocalPodInformer() cache.SharedIndexInformer + NamespaceInformer() coreinformers.NamespaceInformer + PodCoreInformer() v1coreinformers.PodInformer GetPods(namespace string) ([]*kapi.Pod, error) GetPod(namespace, name string) (*kapi.Pod, error) diff --git a/go-controller/pkg/kube/kube.go b/go-controller/pkg/kube/kube.go index c790df4b9b..681e012044 100644 --- a/go-controller/pkg/kube/kube.go +++ b/go-controller/pkg/kube/kube.go @@ -6,6 +6,7 @@ import ( ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" ocpcloudnetworkclientset "github.com/openshift/client-go/cloudnetwork/clientset/versioned" + adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressfirewallclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" @@ -73,6 +74,7 @@ type KubeOVN struct { EgressFirewallClient egressfirewallclientset.Interface CloudNetworkClient ocpcloudnetworkclientset.Interface EgressServiceClient egressserviceclientset.Interface + APBRouteClient adminpolicybasedrouteclientset.Interface } // SetAnnotationsOnPod takes the pod object and map of key/value string pairs to set as annotations diff --git a/go-controller/pkg/kube/mocks/Interface.go b/go-controller/pkg/kube/mocks/Interface.go index f54b087e2b..b9fa0ab582 100644 --- a/go-controller/pkg/kube/mocks/Interface.go +++ b/go-controller/pkg/kube/mocks/Interface.go @@ -3,13 +3,16 @@ package mocks import ( - corev1 "k8s.io/api/core/v1" + adminpolicybasedroutev1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + apicorev1 "k8s.io/api/core/v1" + + corev1 "k8s.io/client-go/kubernetes/typed/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" mock "github.com/stretchr/testify/mock" - v1 "k8s.io/client-go/kubernetes/typed/core/v1" + v1 "github.com/openshift/api/cloudnetwork/v1" ) // Interface is an autogenerated mock type for the Interface type @@ -17,16 +20,53 @@ type Interface struct { mock.Mock } +// CreateCloudPrivateIPConfig provides a mock function with given fields: cloudPrivateIPConfig +func (_m *Interface) CreateCloudPrivateIPConfig(cloudPrivateIPConfig *v1.CloudPrivateIPConfig) (*v1.CloudPrivateIPConfig, error) { + ret := _m.Called(cloudPrivateIPConfig) + + var r0 *v1.CloudPrivateIPConfig + if rf, ok := ret.Get(0).(func(*v1.CloudPrivateIPConfig) *v1.CloudPrivateIPConfig); ok { + r0 = rf(cloudPrivateIPConfig) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*v1.CloudPrivateIPConfig) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(*v1.CloudPrivateIPConfig) error); ok { + r1 = rf(cloudPrivateIPConfig) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// DeleteCloudPrivateIPConfig provides a mock function with given fields: name +func (_m *Interface) DeleteCloudPrivateIPConfig(name string) error { + ret := _m.Called(name) + + var r0 error + if rf, ok := ret.Get(0).(func(string) error); ok { + r0 = rf(name) + } else { + r0 = ret.Error(0) + } + + return r0 +} + // Events provides a mock function with given fields: -func (_m *Interface) Events() v1.EventInterface { +func (_m *Interface) Events() corev1.EventInterface { ret := _m.Called() - var r0 v1.EventInterface - if rf, ok := ret.Get(0).(func() v1.EventInterface); ok { + var r0 corev1.EventInterface + if rf, ok := ret.Get(0).(func() corev1.EventInterface); ok { r0 = rf() } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(v1.EventInterface) + r0 = ret.Get(0).(corev1.EventInterface) } } @@ -57,15 +97,15 @@ func (_m *Interface) GetAnnotationsOnPod(namespace string, name string) (map[str } // GetNamespaces provides a mock function with given fields: labelSelector -func (_m *Interface) GetNamespaces(labelSelector metav1.LabelSelector) (*corev1.NamespaceList, error) { +func (_m *Interface) GetNamespaces(labelSelector metav1.LabelSelector) (*apicorev1.NamespaceList, error) { ret := _m.Called(labelSelector) - var r0 *corev1.NamespaceList - if rf, ok := ret.Get(0).(func(metav1.LabelSelector) *corev1.NamespaceList); ok { + var r0 *apicorev1.NamespaceList + if rf, ok := ret.Get(0).(func(metav1.LabelSelector) *apicorev1.NamespaceList); ok { r0 = rf(labelSelector) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.NamespaceList) + r0 = ret.Get(0).(*apicorev1.NamespaceList) } } @@ -80,15 +120,15 @@ func (_m *Interface) GetNamespaces(labelSelector metav1.LabelSelector) (*corev1. } // GetNode provides a mock function with given fields: name -func (_m *Interface) GetNode(name string) (*corev1.Node, error) { +func (_m *Interface) GetNode(name string) (*apicorev1.Node, error) { ret := _m.Called(name) - var r0 *corev1.Node - if rf, ok := ret.Get(0).(func(string) *corev1.Node); ok { + var r0 *apicorev1.Node + if rf, ok := ret.Get(0).(func(string) *apicorev1.Node); ok { r0 = rf(name) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.Node) + r0 = ret.Get(0).(*apicorev1.Node) } } @@ -103,15 +143,15 @@ func (_m *Interface) GetNode(name string) (*corev1.Node, error) { } // GetNodes provides a mock function with given fields: -func (_m *Interface) GetNodes() (*corev1.NodeList, error) { +func (_m *Interface) GetNodes() (*apicorev1.NodeList, error) { ret := _m.Called() - var r0 *corev1.NodeList - if rf, ok := ret.Get(0).(func() *corev1.NodeList); ok { + var r0 *apicorev1.NodeList + if rf, ok := ret.Get(0).(func() *apicorev1.NodeList); ok { r0 = rf() } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.NodeList) + r0 = ret.Get(0).(*apicorev1.NodeList) } } @@ -126,15 +166,15 @@ func (_m *Interface) GetNodes() (*corev1.NodeList, error) { } // GetPod provides a mock function with given fields: namespace, name -func (_m *Interface) GetPod(namespace string, name string) (*corev1.Pod, error) { +func (_m *Interface) GetPod(namespace string, name string) (*apicorev1.Pod, error) { ret := _m.Called(namespace, name) - var r0 *corev1.Pod - if rf, ok := ret.Get(0).(func(string, string) *corev1.Pod); ok { + var r0 *apicorev1.Pod + if rf, ok := ret.Get(0).(func(string, string) *apicorev1.Pod); ok { r0 = rf(namespace, name) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.Pod) + r0 = ret.Get(0).(*apicorev1.Pod) } } @@ -149,15 +189,15 @@ func (_m *Interface) GetPod(namespace string, name string) (*corev1.Pod, error) } // GetPods provides a mock function with given fields: namespace, labelSelector -func (_m *Interface) GetPods(namespace string, labelSelector metav1.LabelSelector) (*corev1.PodList, error) { +func (_m *Interface) GetPods(namespace string, labelSelector metav1.LabelSelector) (*apicorev1.PodList, error) { ret := _m.Called(namespace, labelSelector) - var r0 *corev1.PodList - if rf, ok := ret.Get(0).(func(string, metav1.LabelSelector) *corev1.PodList); ok { + var r0 *apicorev1.PodList + if rf, ok := ret.Get(0).(func(string, metav1.LabelSelector) *apicorev1.PodList); ok { r0 = rf(namespace, labelSelector) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.PodList) + r0 = ret.Get(0).(*apicorev1.PodList) } } @@ -172,11 +212,11 @@ func (_m *Interface) GetPods(namespace string, labelSelector metav1.LabelSelecto } // PatchNode provides a mock function with given fields: old, new -func (_m *Interface) PatchNode(old *corev1.Node, new *corev1.Node) error { +func (_m *Interface) PatchNode(old *apicorev1.Node, new *apicorev1.Node) error { ret := _m.Called(old, new) var r0 error - if rf, ok := ret.Get(0).(func(*corev1.Node, *corev1.Node) error); ok { + if rf, ok := ret.Get(0).(func(*apicorev1.Node, *apicorev1.Node) error); ok { r0 = rf(old, new) } else { r0 = ret.Error(0) @@ -186,11 +226,11 @@ func (_m *Interface) PatchNode(old *corev1.Node, new *corev1.Node) error { } // RemoveTaintFromNode provides a mock function with given fields: nodeName, taint -func (_m *Interface) RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error { +func (_m *Interface) RemoveTaintFromNode(nodeName string, taint *apicorev1.Taint) error { ret := _m.Called(nodeName, taint) var r0 error - if rf, ok := ret.Get(0).(func(string, *corev1.Taint) error); ok { + if rf, ok := ret.Get(0).(func(string, *apicorev1.Taint) error); ok { r0 = rf(nodeName, taint) } else { r0 = ret.Error(0) @@ -256,11 +296,11 @@ func (_m *Interface) SetAnnotationsOnService(namespace string, serviceName strin } // SetTaintOnNode provides a mock function with given fields: nodeName, taint -func (_m *Interface) SetTaintOnNode(nodeName string, taint *corev1.Taint) error { +func (_m *Interface) SetTaintOnNode(nodeName string, taint *apicorev1.Taint) error { ret := _m.Called(nodeName, taint) var r0 error - if rf, ok := ret.Get(0).(func(string, *corev1.Taint) error); ok { + if rf, ok := ret.Get(0).(func(string, *apicorev1.Taint) error); ok { r0 = rf(nodeName, taint) } else { r0 = ret.Error(0) @@ -269,12 +309,35 @@ func (_m *Interface) SetTaintOnNode(nodeName string, taint *corev1.Taint) error return r0 } +// UpdateCloudPrivateIPConfig provides a mock function with given fields: cloudPrivateIPConfig +func (_m *Interface) UpdateCloudPrivateIPConfig(cloudPrivateIPConfig *v1.CloudPrivateIPConfig) (*v1.CloudPrivateIPConfig, error) { + ret := _m.Called(cloudPrivateIPConfig) + + var r0 *v1.CloudPrivateIPConfig + if rf, ok := ret.Get(0).(func(*v1.CloudPrivateIPConfig) *v1.CloudPrivateIPConfig); ok { + r0 = rf(cloudPrivateIPConfig) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*v1.CloudPrivateIPConfig) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(*v1.CloudPrivateIPConfig) error); ok { + r1 = rf(cloudPrivateIPConfig) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // UpdateNode provides a mock function with given fields: node -func (_m *Interface) UpdateNode(node *corev1.Node) error { +func (_m *Interface) UpdateNode(node *apicorev1.Node) error { ret := _m.Called(node) var r0 error - if rf, ok := ret.Get(0).(func(*corev1.Node) error); ok { + if rf, ok := ret.Get(0).(func(*apicorev1.Node) error); ok { r0 = rf(node) } else { r0 = ret.Error(0) @@ -298,11 +361,11 @@ func (_m *Interface) UpdateEgressServiceStatus(namespace string, name string, ho } // UpdateNodeStatus provides a mock function with given fields: node -func (_m *Interface) UpdateNodeStatus(node *corev1.Node) error { +func (_m *Interface) UpdateNodeStatus(node *apicorev1.Node) error { ret := _m.Called(node) var r0 error - if rf, ok := ret.Get(0).(func(*corev1.Node) error); ok { + if rf, ok := ret.Get(0).(func(*apicorev1.Node) error); ok { r0 = rf(node) } else { r0 = ret.Error(0) @@ -312,11 +375,11 @@ func (_m *Interface) UpdateNodeStatus(node *corev1.Node) error { } // UpdatePod provides a mock function with given fields: pod -func (_m *Interface) UpdatePod(pod *corev1.Pod) error { +func (_m *Interface) UpdatePod(pod *apicorev1.Pod) error { ret := _m.Called(pod) var r0 error - if rf, ok := ret.Get(0).(func(*corev1.Pod) error); ok { + if rf, ok := ret.Get(0).(func(*apicorev1.Pod) error); ok { r0 = rf(pod) } else { r0 = ret.Error(0) @@ -325,6 +388,20 @@ func (_m *Interface) UpdatePod(pod *corev1.Pod) error { return r0 } +// UpdateStatusAPBExternalRoute provides a mock function with given fields: route +func (_m *Interface) UpdateStatusAPBExternalRoute(route *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute) error { + ret := _m.Called(route) + + var r0 error + if rf, ok := ret.Get(0).(func(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute) error); ok { + r0 = rf(route) + } else { + r0 = ret.Error(0) + } + + return r0 +} + type mockConstructorTestingTNewInterface interface { mock.TestingT Cleanup(func()) diff --git a/go-controller/pkg/libovsdbops/router.go b/go-controller/pkg/libovsdbops/router.go index d34852e4f7..9c8467e9ee 100644 --- a/go-controller/pkg/libovsdbops/router.go +++ b/go-controller/pkg/libovsdbops/router.go @@ -787,6 +787,24 @@ func DeleteBFDs(nbClient libovsdbclient.Client, bfds ...*nbdb.BFD) error { return m.Delete(opModels...) } +func LookupBFD(nbClient libovsdbclient.Client, bfd *nbdb.BFD) (*nbdb.BFD, error) { + found := []*nbdb.BFD{} + opModel := operationModel{ + Model: bfd, + ModelPredicate: func(item *nbdb.BFD) bool { return item.DstIP == bfd.DstIP && item.LogicalPort == bfd.LogicalPort }, + ExistingResult: &found, + ErrNotFound: true, + BulkOp: false, + } + + m := newModelClient(nbClient) + err := m.Lookup(opModel) + if err != nil { + return nil, err + } + return found[0], nil +} + // LB OPs // AddLoadBalancersToLogicalRouterOps adds the provided load balancers to the diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index c5f97cc555..e4ee0b217d 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -191,6 +191,7 @@ func NewNetworkControllerManager(ovnClient *util.OVNClientset, identity string, EgressFirewallClient: ovnClient.EgressFirewallClient, CloudNetworkClient: ovnClient.CloudNetworkClient, EgressServiceClient: ovnClient.EgressServiceClient, + APBRouteClient: ovnClient.AdminPolicyRouteClient, }, stopChan: make(chan struct{}), watchFactory: wf, diff --git a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go index d59f4d9c05..059696b501 100644 --- a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go @@ -16,7 +16,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "k8s.io/apimachinery/pkg/util/wait" - clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" kexec "k8s.io/utils/exec" @@ -24,12 +23,12 @@ import ( // nodeNetworkControllerManager structure is the object manages all controllers for all networks for ovnkube-node type nodeNetworkControllerManager struct { - name string - client clientset.Interface - Kube kube.Interface - watchFactory factory.NodeWatchFactory - stopChan chan struct{} - recorder record.EventRecorder + name string + ovnNodeClient *util.OVNNodeClientset + Kube kube.Interface + watchFactory factory.NodeWatchFactory + stopChan chan struct{} + recorder record.EventRecorder defaultNodeNetworkController nad.BaseNetworkController @@ -55,19 +54,19 @@ func (ncm *nodeNetworkControllerManager) CleanupDeletedNetworks(allControllers [ // newCommonNetworkControllerInfo creates and returns the base node network controller info func (ncm *nodeNetworkControllerManager) newCommonNetworkControllerInfo() *node.CommonNodeNetworkControllerInfo { - return node.NewCommonNodeNetworkControllerInfo(ncm.client, ncm.watchFactory, ncm.recorder, ncm.name) + return node.NewCommonNodeNetworkControllerInfo(ncm.ovnNodeClient.KubeClient, ncm.ovnNodeClient.AdminPolicyRouteClient, ncm.watchFactory, ncm.recorder, ncm.name) } // NewNodeNetworkControllerManager creates a new OVN controller manager to manage all the controller for all networks func NewNodeNetworkControllerManager(ovnClient *util.OVNClientset, wf factory.NodeWatchFactory, name string, eventRecorder record.EventRecorder) (*nodeNetworkControllerManager, error) { ncm := &nodeNetworkControllerManager{ - name: name, - client: ovnClient.KubeClient, - Kube: &kube.Kube{KClient: ovnClient.KubeClient}, - watchFactory: wf, - stopChan: make(chan struct{}), - recorder: eventRecorder, + name: name, + ovnNodeClient: &util.OVNNodeClientset{KubeClient: ovnClient.KubeClient, AdminPolicyRouteClient: ovnClient.AdminPolicyRouteClient}, + Kube: &kube.Kube{KClient: ovnClient.KubeClient}, + watchFactory: wf, + stopChan: make(chan struct{}), + recorder: eventRecorder, } // need to configure OVS interfaces for Pods on secondary networks in the DPU mode diff --git a/go-controller/pkg/node/base_node_network_controller_dpu_test.go b/go-controller/pkg/node/base_node_network_controller_dpu_test.go index e20e57043e..006c77944e 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu_test.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu_test.go @@ -10,6 +10,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" factorymocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory/mocks" kubemocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube/mocks" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -105,8 +106,9 @@ var _ = Describe("Node DPU tests", func() { Expect(err).NotTo(HaveOccurred()) kubeMock = kubemocks.Interface{} + apbExternalRouteClient := adminpolicybasedrouteclient.NewSimpleClientset() factoryMock = factorymocks.NodeWatchFactory{} - cnnci := newCommonNodeNetworkControllerInfo(nil, &kubeMock, &factoryMock, nil, "") + cnnci := newCommonNodeNetworkControllerInfo(nil, &kubeMock, apbExternalRouteClient, &factoryMock, nil, "") dnnc = newDefaultNodeNetworkController(cnnci, nil, nil) podNamespaceLister = v1mocks.PodNamespaceLister{} diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 04bd6f1e69..29da5cb2bb 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -25,6 +25,7 @@ import ( honode "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/controller" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/informer" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -32,6 +33,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/upgrade" nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ovspinning" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" retry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -41,11 +43,12 @@ import ( ) type CommonNodeNetworkControllerInfo struct { - client clientset.Interface - Kube kube.Interface - watchFactory factory.NodeWatchFactory - recorder record.EventRecorder - name string + client clientset.Interface + Kube kube.Interface + watchFactory factory.NodeWatchFactory + recorder record.EventRecorder + name string + apbExternalRouteClient adminpolicybasedrouteclientset.Interface } // BaseNodeNetworkController structure per-network fields and network specific configuration @@ -67,22 +70,23 @@ type BaseNodeNetworkController struct { wg *sync.WaitGroup } -func newCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, kube kube.Interface, +func newCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, kube kube.Interface, apbExternalRouteClient adminpolicybasedrouteclientset.Interface, wf factory.NodeWatchFactory, eventRecorder record.EventRecorder, name string) *CommonNodeNetworkControllerInfo { return &CommonNodeNetworkControllerInfo{ - client: kubeClient, - Kube: kube, - watchFactory: wf, - name: name, - recorder: eventRecorder, + client: kubeClient, + Kube: kube, + apbExternalRouteClient: apbExternalRouteClient, + watchFactory: wf, + name: name, + recorder: eventRecorder, } } // NewCommonNodeNetworkControllerInfo creates and returns the base node network controller info -func NewCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, wf factory.NodeWatchFactory, +func NewCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, apbExternalRouteClient adminpolicybasedrouteclientset.Interface, wf factory.NodeWatchFactory, eventRecorder record.EventRecorder, name string) *CommonNodeNetworkControllerInfo { - return newCommonNodeNetworkControllerInfo(kubeClient, &kube.Kube{KClient: kubeClient}, wf, eventRecorder, name) + return newCommonNodeNetworkControllerInfo(kubeClient, &kube.Kube{KClient: kubeClient}, apbExternalRouteClient, wf, eventRecorder, name) } // DefaultNodeNetworkController is the object holder for utilities meant for node management of default network @@ -99,10 +103,13 @@ type DefaultNodeNetworkController struct { retryNamespaces *retry.RetryFramework // retry framework for endpoint slices, used for the removal of stale conntrack entries for services retryEndpointSlices *retry.RetryFramework + + apbExternalRouteNodeController *apbroute.ExternalGatewayNodeController } func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, stopChan chan struct{}, wg *sync.WaitGroup) *DefaultNodeNetworkController { + return &DefaultNodeNetworkController{ BaseNodeNetworkController: BaseNodeNetworkController{ CommonNodeNetworkControllerInfo: *cnnci, @@ -116,13 +123,13 @@ func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, sto // NewDefaultNodeNetworkController creates a new network controller for node management of the default network func NewDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo) (*DefaultNodeNetworkController, error) { + var err error stopChan := make(chan struct{}) wg := &sync.WaitGroup{} nc := newDefaultNodeNetworkController(cnnci, stopChan, wg) if len(config.Kubernetes.HealthzBindAddress) != 0 { klog.Infof("Enable node proxy healthz server on %s", config.Kubernetes.HealthzBindAddress) - var err error nc.healthzServer, err = newNodeProxyHealthzServer( nc.name, config.Kubernetes.HealthzBindAddress, nc.recorder, nc.watchFactory) if err != nil { @@ -130,7 +137,17 @@ func NewDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo) (*D } } + nc.apbExternalRouteNodeController, err = apbroute.NewExternalNodeController( + cnnci.apbExternalRouteClient, + nc.watchFactory.PodCoreInformer(), + nc.watchFactory.NamespaceInformer(), + stopChan) + if err != nil { + return nil, err + } + nc.initRetryFrameworkForNode() + return nc, nil } @@ -917,6 +934,11 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { c.Run(1) }() } + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.apbExternalRouteNodeController.Run(1) + }() nc.wg.Add(1) go func() { @@ -1049,13 +1071,17 @@ func (nc *DefaultNodeNetworkController) checkAndDeleteStaleConntrackEntries() { } func (nc *DefaultNodeNetworkController) syncConntrackForExternalGateways(newNs *kapi.Namespace) error { + gatewayIPs, err := nc.apbExternalRouteNodeController.GetAdminPolicyBasedExternalRouteIPsForTargetNamespace(newNs.Name) + if err != nil { + klog.Errorf("Unable to retrieve Admin Policy Based External Route objects:%v", err) + } // loop through all the IPs on the annotations; ARP for their MACs and form an allowlist - gatewayIPs := strings.Split(newNs.Annotations[util.ExternalGatewayPodIPsAnnotation], ",") - gatewayIPs = append(gatewayIPs, strings.Split(newNs.Annotations[util.RoutingExternalGWsAnnotation], ",")...) + gatewayIPs = gatewayIPs.Insert(strings.Split(newNs.Annotations[util.ExternalGatewayPodIPsAnnotation], ",")...) + gatewayIPs = gatewayIPs.Insert(strings.Split(newNs.Annotations[util.RoutingExternalGWsAnnotation], ",")...) var wg sync.WaitGroup wg.Add(len(gatewayIPs)) validMACs := sync.Map{} - for _, gwIP := range gatewayIPs { + for gwIP := range gatewayIPs { go func(gwIP string) { defer wg.Done() if len(gwIP) > 0 && !utilnet.IsIPv6String(gwIP) { diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 6366208cd3..3e0ed67a76 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -23,6 +23,7 @@ import ( "k8s.io/client-go/kubernetes/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -714,7 +715,8 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, Items: []v1.Node{existingNode}, }) fakeClient := &util.OVNNodeClientset{ - KubeClient: kubeFakeClient, + KubeClient: kubeFakeClient, + AdminPolicyRouteClient: adminpolicybasedrouteclient.NewSimpleClientset(), } stop := make(chan struct{}) @@ -729,7 +731,7 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, err = wf.Start() Expect(err).NotTo(HaveOccurred()) - cnnci := NewCommonNodeNetworkControllerInfo(nil, wf, nil, nodeName) + cnnci := NewCommonNodeNetworkControllerInfo(nil, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName) nc := newDefaultNodeNetworkController(cnnci, stop, wg) // must run route manager manually which is usually started with nc.Start() wg.Add(1) diff --git a/go-controller/pkg/node/ovn_test.go b/go-controller/pkg/node/ovn_test.go index 0f82c1d7fd..a5ddb899fc 100644 --- a/go-controller/pkg/node/ovn_test.go +++ b/go-controller/pkg/node/ovn_test.go @@ -6,6 +6,7 @@ import ( . "github.com/onsi/gomega" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" egressserviceapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" @@ -54,8 +55,9 @@ func (o *FakeOVNNode) start(ctx *cli.Context, objects ...runtime.Object) { Expect(err).NotTo(HaveOccurred()) o.fakeClient = &util.OVNNodeClientset{ - KubeClient: fake.NewSimpleClientset(v1Objects...), - EgressServiceClient: egressservicefake.NewSimpleClientset(egressServiceObjects...), + KubeClient: fake.NewSimpleClientset(v1Objects...), + EgressServiceClient: egressservicefake.NewSimpleClientset(egressServiceObjects...), + AdminPolicyRouteClient: adminpolicybasedrouteclient.NewSimpleClientset(), } o.init() // initializes the node } @@ -79,7 +81,7 @@ func (o *FakeOVNNode) init() { o.watcher, err = factory.NewNodeWatchFactory(o.fakeClient, fakeNodeName) Expect(err).NotTo(HaveOccurred()) - cnnci := NewCommonNodeNetworkControllerInfo(o.fakeClient.KubeClient, o.watcher, o.recorder, fakeNodeName) + cnnci := NewCommonNodeNetworkControllerInfo(o.fakeClient.KubeClient, o.fakeClient.AdminPolicyRouteClient, o.watcher, o.recorder, fakeNodeName) o.nc = newDefaultNodeNetworkController(cnnci, o.stopChan, o.wg) // watcher is started by nodeNetworkControllerManager, not by nodeNetworkcontroller, so start it here. o.watcher.Start() diff --git a/go-controller/pkg/ovn/controller/apbroute/apbroute_suite_test.go b/go-controller/pkg/ovn/controller/apbroute/apbroute_suite_test.go new file mode 100644 index 0000000000..1305fd2bad --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/apbroute_suite_test.go @@ -0,0 +1,13 @@ +package apbroute + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +func TestApbroute(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Admin Based Policy External Route Controller Suite") +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller.go b/go-controller/pkg/ovn/controller/apbroute/external_controller.go new file mode 100644 index 0000000000..ea6f2d23fb --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller.go @@ -0,0 +1,372 @@ +package apbroute + +import ( + "fmt" + "strings" + "sync" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedroutelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" +) + +type gatewayInfoList []*gatewayInfo + +func (g gatewayInfoList) String() string { + + s := strings.Builder{} + for _, item := range g { + s.WriteString(fmt.Sprintf("%s, ", item.gws)) + } + return s.String() +} + +func (g gatewayInfoList) HasIP(ip string) bool { + for _, i := range g { + if i.gws.Has(ip) { + return true + } + } + return false +} + +func (g gatewayInfoList) Insert(items ...*gatewayInfo) (gatewayInfoList, sets.Set[string]) { + ret := append(gatewayInfoList{}, g...) + duplicated := sets.New[string]() + for _, item := range items { + for _, ip := range item.gws.UnsortedList() { + if ret.HasIP(ip) { + duplicated = duplicated.Insert(ip) + continue + } + ret = append(ret, item) + } + } + return ret, duplicated +} +func (g gatewayInfoList) Delete(item *gatewayInfo) gatewayInfoList { + ret := gatewayInfoList{} + for _, i := range g { + if !i.gws.Equal(item.gws) { + ret, _ = ret.Insert(i) + } + } + return ret +} + +func (g gatewayInfoList) Len() int { + return len(g) +} + +func (g gatewayInfoList) Less(i, j int) bool { return lessGWsIP(g[i], g[j]) } +func (g gatewayInfoList) Swap(i, j int) { g[i], g[j] = g[j], g[i] } + +func lessGWsIP(l, r *gatewayInfo) bool { + + for lip := range l.gws { + for rip := range r.gws { + if lip > rip { + return false + } + } + } + return true +} + +type gatewayInfo struct { + gws sets.Set[string] + bfdEnabled bool +} +type namespaceInfo struct { + policies sets.Set[string] + staticGateways gatewayInfoList + dynamicGateways map[ktypes.NamespacedName]*gatewayInfo +} + +func newNamespaceInfo() *namespaceInfo { + return &namespaceInfo{ + policies: sets.New[string](), + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo), + staticGateways: gatewayInfoList{}, + } +} + +type routeInfo struct { + policy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + toBeDeleted bool +} + +type ExternalRouteInfo struct { + sync.Mutex + Deleted bool + PodName ktypes.NamespacedName + // PodExternalRoutes is a cache keeping the LR routes added to the GRs when + // external gateways are used. The first map key is the podIP (src-ip of the route), + // the second the GW IP (next hop), and the third the GR name + PodExternalRoutes map[string]map[string]string +} + +// This structure contains the processed information of a policy. +// This information is then used to update the network components (North Bound DB, conntrack) by applying the IPs here to each of the target namespaces defined in the from field. +type routePolicy struct { + // targetNamespacesSelector contains the namespace selector defined in the from field in the policy. + targetNamespacesSelector *metav1.LabelSelector + // staticGateways contains the processed list of IPs and BFD information defined in the staticHop slice in the policy. + staticGateways gatewayInfoList + // dynamicGateways contains the processed list of IPs and BFD information defined in the dynamicHop slice in the policy. + // the IP and BFD information of each pod gateway is stored in a map where the key is of type NamespacedName with the namespace and podName as values + // and the value is the gatewayInfo, which contains a set of IPs and the flag to determine if the BFD protocol is to be enabled for this IP + dynamicGateways map[ktypes.NamespacedName]*gatewayInfo +} + +type externalPolicyManager struct { + stopCh <-chan struct{} + // route policies + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister + // Pods + podLister corev1listers.PodLister + // Namespaces + namespaceLister corev1listers.NamespaceLister + // cache for set of policies impacting a given namespace + namespaceInfoSyncCache *syncmap.SyncMap[*namespaceInfo] + routePolicySyncCache *syncmap.SyncMap[*routeInfo] + // networkClient is an interface that exposes add and delete GW IPs. There are 2 structs that implement this contract: one to interface with the north bound DB and another one for the conntrack. + // the north bound is used by the master controller to add and delete the logical static routes, whilst the conntrack is used by the node controller to ensure that the ECMP entries are removed + // when a gateway IP is no longer an egress access point. + netClient networkClient + // flag used to determine if the repair() function has completed populating the policy route cache. + routePolicyCachePopulated bool + mutexRoutePolicyCachePopulated *sync.Mutex +} + +func newExternalPolicyManager( + stopCh <-chan struct{}, + podLister corev1listers.PodLister, + namespaceLister corev1listers.NamespaceLister, + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister, + netClient networkClient) *externalPolicyManager { + + m := externalPolicyManager{ + stopCh: stopCh, + routeLister: routeLister, + podLister: podLister, + namespaceLister: namespaceLister, + namespaceInfoSyncCache: syncmap.NewSyncMap[*namespaceInfo](), + routePolicySyncCache: syncmap.NewSyncMap[*routeInfo](), + netClient: netClient, + mutexRoutePolicyCachePopulated: &sync.Mutex{}, + } + + return &m +} + +func (m *externalPolicyManager) setRoutePolicyCacheAsPopulated() { + m.mutexRoutePolicyCachePopulated.Lock() + defer m.mutexRoutePolicyCachePopulated.Unlock() + m.routePolicyCachePopulated = true +} + +func (m *externalPolicyManager) isRoutePolicyCachePopulated() bool { + m.mutexRoutePolicyCachePopulated.Lock() + defer m.mutexRoutePolicyCachePopulated.Unlock() + return m.routePolicyCachePopulated +} + +// getRoutePolicyFromCache retrieves the cached value of the policy if it exists in the cache, as well as locking the key in case it exists. +func (m *externalPolicyManager) getRoutePolicyFromCache(policyName string) (adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, bool, bool) { + var ( + policy adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + found, markedForDeletion bool + ) + _ = m.routePolicySyncCache.DoWithLock(policyName, func(policyName string) error { + ri, f := m.routePolicySyncCache.Load(policyName) + if !f { + return nil + } + found = f + policy = *ri.policy + markedForDeletion = ri.toBeDeleted + return nil + }) + return policy, found, markedForDeletion +} + +func (m *externalPolicyManager) storeRoutePolicyInCache(policyInfo *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + return m.routePolicySyncCache.DoWithLock(policyInfo.Name, func(policyName string) error { + ri, found := m.routePolicySyncCache.Load(policyName) + if !found { + m.routePolicySyncCache.LoadOrStore(policyName, &routeInfo{policy: policyInfo}) + return nil + } + if ri.toBeDeleted { + return fmt.Errorf("attempting to store policy %s that is in the process of being deleted", policyInfo.Name) + } + ri.policy = policyInfo + return nil + }) +} + +func (m *externalPolicyManager) deleteRoutePolicyFromCache(policyName string) error { + return m.routePolicySyncCache.DoWithLock(policyName, func(policyName string) error { + ri, found := m.routePolicySyncCache.Load(policyName) + if found && !ri.toBeDeleted { + return fmt.Errorf("attempting to delete route policy %s from cache before it has been marked for deletion", policyName) + } + m.routePolicySyncCache.Delete(policyName) + return nil + }) +} + +// getAndMarkRoutePolicyForDeletionInCache flags a route policy for deletion and returns its cached value. This mark is used as a flag for other routines that attempt to retrieve the policy +// while processing pods or namespaces related to the given policy. +func (m *externalPolicyManager) getAndMarkRoutePolicyForDeletionInCache(policyName string) (adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, bool) { + var ( + exists bool + routePolicy adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + ) + _ = m.routePolicySyncCache.DoWithLock(policyName, func(policyName string) error { + ri, found := m.routePolicySyncCache.Load(policyName) + if !found { + return nil + } + ri.toBeDeleted = true + exists = true + routePolicy = *ri.policy + return nil + }) + return routePolicy, exists +} + +func (m *externalPolicyManager) getNamespaceInfoFromCache(namespaceName string) (*namespaceInfo, bool) { + m.namespaceInfoSyncCache.LockKey(namespaceName) + nsInfo, ok := m.namespaceInfoSyncCache.Load(namespaceName) + if !ok { + m.namespaceInfoSyncCache.UnlockKey(namespaceName) + return nil, false + } + return nsInfo, true +} + +func (m *externalPolicyManager) deleteNamespaceInfoInCache(namespaceName string) { + m.namespaceInfoSyncCache.Delete(namespaceName) +} + +func (m *externalPolicyManager) unlockNamespaceInfoCache(namespaceName string) { + m.namespaceInfoSyncCache.UnlockKey(namespaceName) +} + +func (m *externalPolicyManager) newNamespaceInfoInCache(namespaceName string) *namespaceInfo { + m.namespaceInfoSyncCache.LockKey(namespaceName) + nsInfo, _ := m.namespaceInfoSyncCache.LoadOrStore(namespaceName, newNamespaceInfo()) + return nsInfo +} + +func (m *externalPolicyManager) listNamespaceInfoCache() []string { + return m.namespaceInfoSyncCache.GetKeys() +} + +func (m *externalPolicyManager) getAllRoutePolicies() ([]*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, error) { + var ( + routePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + err error + ) + // avoid hitting the informer if the route policies have already been cached during the execution of the repair() function. + if m.isRoutePolicyCachePopulated() { + keys := m.routePolicySyncCache.GetKeys() + for _, policyName := range keys { + rp, found, markedForDelete := m.getRoutePolicyFromCache(policyName) + // ignore route policies that have been marked for deletion. They will soon be parted from this cluster. + if !found || (found && markedForDelete) { + continue + } + routePolicies = append(routePolicies, &rp) + } + return routePolicies, nil + } + + routePolicies, err = m.routeLister.List(labels.Everything()) + if err != nil { + klog.Errorf("Failed to list Admin Policy Based External Routes:%v", err) + return nil, err + } + return routePolicies, nil +} + +// getDynamicGatewayIPsForTargetNamespace is called by the annotation logic to identify if a namespace is managed by an CR. +// Since the call can occur outside the lifecycle of the controller, it cannot rely on the namespace info cache object to have been populated. +// Therefore it has to go through all policies until it identifies one that targets the namespace and retrieve the gateway IPs. +// these IPs are used by the annotation logic to determine which ones to remove from the north bound DB (the ones not included in the list), +// and the ones to keep (the ones that match both the annotation and the CR). +// This logic ensures that both CR and annotations can coexist without duplicating gateway IPs. +func (m *externalPolicyManager) getDynamicGatewayIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + policyGWIPs := sets.New[string]() + + routePolicies, err := m.getAllRoutePolicies() + if err != nil { + return nil, err + } + for _, routePolicy := range routePolicies { + p, err := m.processExternalRoutePolicy(routePolicy) + if err != nil { + klog.Errorf("Failed to process Admin Policy Based External Route %s: %v", routePolicy.Name, err) + return nil, err + } + targetNs, err := m.listNamespacesBySelector(p.targetNamespacesSelector) + if err != nil { + klog.Errorf("Failed to process namespace selector for Admin Policy Based External Route %s:%v", routePolicy.Name, err) + return nil, err + } + for _, ns := range targetNs { + if ns.Name == namespaceName { + // only collect the dynamic gateways + for _, gwInfo := range p.dynamicGateways { + policyGWIPs = policyGWIPs.Union(gwInfo.gws) + } + } + } + } + return policyGWIPs, nil +} + +// getStaticGatewayIPsForTargetNamespace is called by the annotation logic to identify if a namespace is managed by an CR. +// Since the call can occur outside the lifecycle of the controller, it cannot rely on the namespace info cache object to have been populated. +// Therefore it has to go through all policies until it identifies one that targets the namespace and retrieve the gateway IPs. +// these IPs are used by the annotation logic to determine which ones to remove from the north bound DB (the ones not included in the list), +// and the ones to keep (the ones that match both the annotation and the CR). +// This logic ensures that both CR and annotations can coexist without duplicating gateway IPs. +func (m *externalPolicyManager) getStaticGatewayIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + policyGWIPs := sets.New[string]() + + routePolicies, err := m.routeLister.List(labels.Everything()) + if err != nil { + klog.Errorf("Failed to list Admin Policy Based External Routes:%v", err) + return nil, err + } + for _, routePolicy := range routePolicies { + p, err := m.processExternalRoutePolicy(routePolicy) + if err != nil { + klog.Errorf("Failed to process Admin Policy Based External Route %s: %v", routePolicy.Name, err) + return nil, err + } + targetNs, err := m.listNamespacesBySelector(p.targetNamespacesSelector) + if err != nil { + klog.Errorf("Failed to process namespace selector for Admin Policy Based External Route %s:%v", routePolicy.Name, err) + return nil, err + } + for _, ns := range targetNs { + if ns.Name == namespaceName { + // only collect the static gateways + for _, gwInfo := range p.staticGateways { + policyGWIPs.Insert(gwInfo.gws.UnsortedList()...) + } + } + } + } + return policyGWIPs, nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace.go new file mode 100644 index 0000000000..f82b80a3f6 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace.go @@ -0,0 +1,137 @@ +package apbroute + +import ( + "fmt" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" +) + +// processAddNamespace takes in a namespace and applies the policies that are applicable to the namespace, previously stored in the cacheInfo object argument. +// The logic goes through all the policies and applies the gateway IPs derived from the static and dynamic hop to all the pods in the namespace. +// Lastly, it updates the cacheInfo to contain the static and dynamic gateway IPs generated from the previous action to keep track of the gateway IPs applied in the namespace. +func (m *externalPolicyManager) processAddNamespace(new *v1.Namespace, cacheInfo *namespaceInfo) error { + staticGateways, dynamicGateways, err := m.aggregateNamespaceInfo(cacheInfo.policies) + if err != nil { + return err + } + cacheInfo.staticGateways = staticGateways + cacheInfo.dynamicGateways = dynamicGateways + return nil +} + +// processUpdateNamespace takes in a namespace name, current policies applied to the namespace, policies that are now expected to be applied to the namespace and the cache info +// that contains all the current gateway IPs and policies for that namespace. It follows this logic: +// * Calculate the difference between current and expected policies and proceed to remove the gateway IPs from the policies that are no longer applicable to this namespace +// * Calculate the difference between the expected and current ones to determine the new policies to be applied and proceed to apply them. +// * Update the cache info with the new list of policies, as well as the static and dynamic gateway IPs derived from executing the previous logic. +func (m *externalPolicyManager) processUpdateNamespace(namespaceName string, currentPolicies, newPolicies sets.Set[string], cacheInfo *namespaceInfo) error { + + // some differences apply, let's figure out if previous policies have been removed first + policiesNotValid := currentPolicies.Difference(newPolicies) + // iterate through the policies that no longer apply to this namespace + for policyName := range policiesNotValid { + err := m.removePolicyFromNamespaceWithName(namespaceName, policyName, cacheInfo) + if err != nil { + return err + } + } + + // policies that now apply to this namespace + newPoliciesDiff := newPolicies.Difference(currentPolicies) + for policyName := range newPoliciesDiff { + policy, found, markedForDeletion := m.getRoutePolicyFromCache(policyName) + if !found { + return fmt.Errorf("failed to find external route policy %s in cache", policyName) + } + if markedForDeletion { + klog.Infof("Skipping route policy %s as it has been marked for deletion", policyName) + continue + } + err := m.applyPolicyToNamespace(namespaceName, &policy, cacheInfo) + if err != nil { + return err + } + } + // at least one policy apply, let's update the cache + cacheInfo.policies = newPolicies + return nil + +} + +func (m *externalPolicyManager) applyPolicyToNamespace(namespaceName string, policy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, cacheInfo *namespaceInfo) error { + + processedPolicy, err := m.processExternalRoutePolicy(policy) + if err != nil { + return err + } + err = m.applyProcessedPolicyToNamespace(namespaceName, policy.Name, processedPolicy, cacheInfo) + if err != nil { + return err + } + return nil +} + +func (m *externalPolicyManager) removePolicyFromNamespaceWithName(targetNamespace, policyName string, cacheInfo *namespaceInfo) error { + policy, err := m.routeLister.Get(policyName) + if err != nil { + return err + } + return m.removePolicyFromNamespace(targetNamespace, policy, cacheInfo) +} +func (m *externalPolicyManager) removePolicyFromNamespace(targetNamespace string, policy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, cacheInfo *namespaceInfo) error { + + processedPolicy, err := m.processExternalRoutePolicy(policy) + if err != nil { + return err + } + err = m.deletePolicyInNamespace(targetNamespace, policy.Name, processedPolicy, cacheInfo) + if err != nil { + return err + } + cacheInfo.policies.Delete(policy.Name) + return nil +} + +func (m *externalPolicyManager) listNamespacesBySelector(selector *metav1.LabelSelector) ([]*v1.Namespace, error) { + s, err := metav1.LabelSelectorAsSelector(selector) + if err != nil { + return nil, err + } + ns, err := m.namespaceLister.List(s) + if err != nil { + return nil, err + } + return ns, nil + +} + +func (m *externalPolicyManager) aggregateNamespaceInfo(policies sets.Set[string]) (gatewayInfoList, map[ktypes.NamespacedName]*gatewayInfo, error) { + + static := gatewayInfoList{} + dynamic := make(map[ktypes.NamespacedName]*gatewayInfo) + for policyName := range policies { + externalPolicy, err := m.routeLister.Get(policyName) + if err != nil { + klog.Warningf("Unable to find route policy %s:%+v", policyName, err) + continue + } + processedPolicy, err := m.processExternalRoutePolicy(externalPolicy) + if err != nil { + return nil, nil, err + } + var duplicated sets.Set[string] + static, duplicated = static.Insert(processedPolicy.staticGateways...) + if duplicated.Len() > 0 { + klog.Warningf("Found duplicated gateway IP(s) %+s in policy(s) %+s", sets.List(duplicated), sets.List(policies)) + } + for podName, gatewayInfo := range processedPolicy.dynamicGateways { + dynamic[podName] = gatewayInfo + } + } + return static, dynamic, nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace_test.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace_test.go new file mode 100644 index 0000000000..0d8e124185 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace_test.go @@ -0,0 +1,358 @@ +package apbroute + +import ( + "context" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes/fake" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + "k8s.io/apimachinery/pkg/runtime" +) + +const ( + annotatedPodIP = "192.168.2.1" + dynamicHopHostNetPodIP = "192.168.1.1" + staticHopGWIP = "10.10.10.1" +) + +func newPolicy(policyName string, fromNSSelector *v1.LabelSelector, staticHopsGWIPs sets.Set[string], dynamicHopsNSSelector *v1.LabelSelector, dynamicHopsPodSelector *v1.LabelSelector, bfdEnabled bool) *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute { + p := adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + ObjectMeta: v1.ObjectMeta{Name: policyName}, + Spec: adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{ + From: adminpolicybasedrouteapi.ExternalNetworkSource{ + NamespaceSelector: *fromNSSelector, + }, + NextHops: adminpolicybasedrouteapi.ExternalNextHops{}, + }, + } + + if staticHopsGWIPs.Len() > 0 { + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{} + for ip := range staticHopsGWIPs { + p.Spec.NextHops.StaticHops = append(p.Spec.NextHops.StaticHops, &adminpolicybasedrouteapi.StaticHop{IP: ip, BFDEnabled: bfdEnabled}) + } + } + if dynamicHopsNSSelector != nil && dynamicHopsPodSelector != nil { + p.Spec.NextHops.DynamicHops = []*adminpolicybasedrouteapi.DynamicHop{ + {NamespaceSelector: dynamicHopsNSSelector, + PodSelector: *dynamicHopsPodSelector, + BFDEnabled: bfdEnabled}, + } + } + return &p +} + +func deletePolicy(policyName string, fakeRouteClient *adminpolicybasedrouteclient.Clientset) { + err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Delete(context.TODO(), policyName, v1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func deleteNamespace(namespaceName string, fakeClient *fake.Clientset) { + + err = fakeClient.CoreV1().Namespaces().Delete(context.Background(), namespaceName, v1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func updateNamespaceLabel(namespaceName string, labels map[string]string, fakeClient *fake.Clientset) { + ns, err := fakeClient.CoreV1().Namespaces().Get(context.TODO(), namespaceName, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + incrementResourceVersion(ns) + ns.Labels = labels + _, err = fakeClient.CoreV1().Namespaces().Update(context.Background(), ns, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func getNamespaceInfo(namespaceName string) *namespaceInfo { + f, found := mgr.getNamespaceInfoFromCache(namespaceName) + if found { + cp := &namespaceInfo{} + deepCopyNamespaceInfo(f, cp) + mgr.unlockNamespaceInfoCache(namespaceName) + return cp + } + return f +} +func listNamespaceInfo() []string { + return mgr.namespaceInfoSyncCache.GetKeys() +} + +func deepCopyNamespaceInfo(source, destination *namespaceInfo) { + destination.policies = sets.New(source.policies.UnsortedList()...) + destination.staticGateways, _ = gatewayInfoList.Insert(source.staticGateways) + destination.dynamicGateways = make(map[ktypes.NamespacedName]*gatewayInfo) + for key, value := range source.dynamicGateways { + destination.dynamicGateways[key] = value + } +} + +var _ = Describe("OVN External Gateway namespace", func() { + + var ( + dynamicPolicy = newPolicy( + "dynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "pod"}}, + false, + ) + + staticPolicy = newPolicy( + "static", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP), + nil, + nil, + false, + ) + + annotatedPodGW = &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: "annotatedPod", Namespace: "default", + Labels: map[string]string{"name": "annotatedPod"}, + Annotations: map[string]string{"k8s.ovn.org/routing-namespaces": "test", "k8s.ovn.org/routing-network": ""}, + }, + Spec: corev1.PodSpec{HostNetwork: true}, + Status: corev1.PodStatus{PodIPs: []corev1.PodIP{{IP: annotatedPodIP}}, Phase: corev1.PodRunning}, + } + + podGW = &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: "pod", Namespace: "default", + Labels: map[string]string{"name": "pod"}}, + Spec: corev1.PodSpec{HostNetwork: true}, + Status: corev1.PodStatus{PodIPs: []corev1.PodIP{{IP: dynamicHopHostNetPodIP}}, Phase: corev1.PodRunning}, + } + namespaceDefault = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "default", + Labels: map[string]string{"name": "default"}}} + namespaceTest = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test", + Labels: map[string]string{"name": "test"}}, + } + namespaceTest2 = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test2", + Labels: map[string]string{"name": "test2"}}, + } + ) + AfterEach(func() { + nbsbCleanup.Cleanup() + }) + + BeforeEach(func() { + initialDB = libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + Name: "node1", + }, + }, + } + nbClient, _, nbsbCleanup, err = libovsdbtest.NewNBSBTestHarness(initialDB) + Expect(err).NotTo(HaveOccurred()) + stopChan = make(chan struct{}) + + }) + + var _ = Context("When no pod or namespace routing network annotations coexist with the policies", func() { + + var _ = Context("When creating new namespaces", func() { + + It("registers the new namespace with no matching policies", func() { + initController([]runtime.Object{namespaceTest2}, []runtime.Object{dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec { + p, found := externalController.mgr.routePolicySyncCache.Load(dynamicPolicy.Name) + if !found { + return adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{} + } + return p.policy.Spec + }, 5).Should(Equal(dynamicPolicy.Spec)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + }) + + It("registers the new namespace with one matching policy containing one static gateway", func() { + initController([]runtime.Object{namespaceTest}, []runtime.Object{staticPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec { + p, found := externalController.mgr.routePolicySyncCache.Load(staticPolicy.Name) + if !found { + return adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{} + } + return p.policy.Spec + }, 5).Should(Equal(staticPolicy.Spec)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + }) + It("registers a new namespace with one policy that includes a dynamic GW", func() { + initController([]runtime.Object{namespaceTest, namespaceDefault, podGW}, []runtime.Object{dynamicPolicy}) + + By("validating that the namespace cache contains the test namespace and that it reflect the applicable policy") + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + }) + + It("registers a new namespace with one policy with dynamic GWs and the IP of an annotated pod", func() { + + initController([]runtime.Object{namespaceTest, namespaceDefault, podGW, annotatedPodGW}, []runtime.Object{dynamicPolicy}) + + By("validating that the namespace cache contains the test namespace and that it reflect the applicable policy") + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + }) + + It("registers a new namespace with one policy and validates that the deleted field is set to false", func() { + initController([]runtime.Object{namespaceTest, namespaceDefault, podGW, annotatedPodGW}, []runtime.Object{dynamicPolicy}) + + deleteNamespace(namespaceTest.Name, fakeClient) + By("validating that the namespace cache no longer contains the test namespace") + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + + _, err = fakeClient.CoreV1().Namespaces().Create(context.TODO(), namespaceTest, v1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("validating that the namespace cache is contained in the namespace info cache and it reflects the correct policy") + + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + }) + }) + }) + + var _ = Context("When deleting a namespace", func() { + + It("validates that the namespace cache is empty and marked as deleted when the namespace was a recipient for policies", func() { + initController([]runtime.Object{namespaceTest}, []runtime.Object{staticPolicy}) + + Expect(externalController.mgr.namespaceInfoSyncCache.GetKeys()).To(HaveLen(0)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + + deleteNamespace(namespaceTest.Name, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + + }) + It("validates that the namespace cache is empty when the namespace that is recipient for any policy is deleted", func() { + initController([]runtime.Object{namespaceDefault}, []runtime.Object{staticPolicy}) + + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + deleteNamespace(namespaceDefault.Name, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + }) + + }) + + var _ = Context("When updating an existing namespace", func() { + + var ( + dynamicPolicyTest2 = newPolicy( + "dynamicPolicyTest2", + &v1.LabelSelector{MatchLabels: map[string]string{"key": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "pod"}}, + false, + ) + ) + It("validates that a namespace is targeted by an existing policy after its labels are updated to match the policy's label selector", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest2}, []runtime.Object{staticPolicy}) + + Eventually(func() []string { return listNamespaceInfo() }, 15).Should(HaveLen(0)) + updateNamespaceLabel(namespaceTest2.Name, staticPolicy.Spec.From.NamespaceSelector.MatchLabels, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 15).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 15).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + }) + It("validates that a namespace is no longer targeted by an existing policy when its labels are updated so that they don't match the policy's label selector", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest}, []runtime.Object{staticPolicy}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + updateNamespaceLabel(namespaceTest.Name, dynamicPolicyTest2.Spec.From.NamespaceSelector.MatchLabels, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(BeNil()) + }) + + It("validates that a namespace changes its policies when its labels are changed to match a different policy, resulting in the later on being the only policy applied to the namespace", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, podGW}, []runtime.Object{staticPolicy, dynamicPolicyTest2}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + updateNamespaceLabel(namespaceTest.Name, dynamicPolicyTest2.Spec.From.NamespaceSelector.MatchLabels, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicyTest2.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + + }) + + It("validates that a namespace is now targeted by a second policy once its labels are updated to match the first and second policy", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, podGW}, []runtime.Object{staticPolicy, dynamicPolicyTest2}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + aggregatedLabels := map[string]string{"name": "test", "key": "test"} + updateNamespaceLabel(namespaceTest.Name, aggregatedLabels, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name, dynamicPolicyTest2.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + }) + }) + +}) diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_pod.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_pod.go new file mode 100644 index 0000000000..60f21181f0 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_pod.go @@ -0,0 +1,436 @@ +package apbroute + +import ( + "encoding/json" + "fmt" + "net" + "strings" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" +) + +// processAddPod covers 2 scenarios: +// 1) The pod is an external gateway, in which case it needs to propagate its IP to a set of pods in the cluster. +// Determining which namespaces to update is determined by matching the pod's namespace and label selector against +// all the existing Admin Policy Based External route CRs. It's a reverse lookup: +// +// pod GW -> dynamic hop -> APB External Route CR -> target namespaces (label selector in the CR's `From`` field) -> pods in namespace +// +// 2) The pod belongs to a namespace impacted by at least one APB External Route CR, in which case its logical routes need to be +// updated to reflect the external routes. +// +// A pod can only be either an external gateway or a consumer of an external route policy. +func (m *externalPolicyManager) processAddPod(newPod *v1.Pod) error { + + // the pod can either be a gateway pod or a standard pod that requires no processing from the external controller. + // to determine either way, find out which matching dynamic hops include this pod. If none applies, then this is + // a standard pod and all is needed is to update it's logical routes to include all the external gateways, if they exist. + podPolicies, err := m.findMatchingDynamicPolicies(newPod) + if err != nil { + return err + } + if len(podPolicies) > 0 { + // this is a gateway pod + klog.Infof("Adding pod gateway %s/%s for policy %+v", newPod.Namespace, newPod.Name, podPolicies) + return m.applyPodGWPolicies(newPod, podPolicies) + } + cacheInfo, found := m.getNamespaceInfoFromCache(newPod.Namespace) + if !found || (found && cacheInfo.policies.Len() == 0) { + // this is a standard pod and there are no external gateway policies applicable to the pod's namespace. Nothing to do + if !found { + return nil + } + m.unlockNamespaceInfoCache(newPod.Namespace) + return nil + } + defer m.unlockNamespaceInfoCache(newPod.Namespace) + // there are external gateway policies applicable to the pod's namespace. + klog.Infof("Applying policies to new pod %s/%s %+v", newPod.Namespace, newPod.Name, cacheInfo.policies) + return m.applyGatewayInfoToPod(newPod, cacheInfo.staticGateways, cacheInfo.dynamicGateways) +} + +func (m *externalPolicyManager) applyPodGWPolicies(pod *v1.Pod, externalRoutePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + for _, erp := range externalRoutePolicies { + err := m.applyPodGWPolicy(pod, erp) + if err != nil { + return err + } + } + return nil +} + +func (m *externalPolicyManager) applyPodGWPolicy(pod *v1.Pod, externalRoutePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + klog.Infof("Processing policy %s for pod %s/%s", externalRoutePolicy.Name, pod.Namespace, pod.Name) + routePolicy, err := m.getRoutePolicyForPodGateway(pod, externalRoutePolicy) + if err != nil { + return err + } + // update all namespaces targeted by this pod's policy to include the new pod IP as their external GW + err = m.applyProcessedPolicy(externalRoutePolicy.Name, routePolicy) + if err != nil { + return err + } + gwInfoMap, err := m.aggregateDynamicRouteGatewayInformation(pod, routePolicy) + if err != nil { + return err + } + key := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + // update the namespace information for each targeted namespace to reflect the gateway IPs that handle external traffic + for ns, gwInfo := range gwInfoMap { + cacheInfo, found := m.getNamespaceInfoFromCache(ns) + if !found { + klog.Warningf("Attempting to update the dynamic gateway information for pod %s in a namespace that does not exist %s", pod.Name, ns) + continue + } + // update the gwInfo in the namespace cache + cacheInfo.dynamicGateways[key] = gwInfo + m.unlockNamespaceInfoCache(ns) + if err != nil { + return err + } + } + return nil +} + +func (m *externalPolicyManager) removePodGatewayFromNamespace(nsName string, podNamespacedName ktypes.NamespacedName) error { + // retrieve the gateway information from the impacted namespace's cache + cacheInfo, found := m.getNamespaceInfoFromCache(nsName) + if !found { + klog.Warningf("Attempting to remove pod gateway %s/%s from a namespace that does not exist %s", podNamespacedName.Namespace, podNamespacedName.Name, nsName) + return nil + } + defer m.unlockNamespaceInfoCache(nsName) + + gateways, found := cacheInfo.dynamicGateways[podNamespacedName] + if !found { + klog.Warningf("Pod %s/%s not found in dynamic cacheInfo for namespace %s", podNamespacedName.Namespace, podNamespacedName.Name, nsName) + return nil + } + annotatedGWIPs, err := m.calculateAnnotatedNamespaceGatewayIPsForNamespace(nsName) + if err != nil { + return err + } + // it is safe to pass the current policies and not to expect the pod IP in the coexisting list of IPs since the pod will no longer match the dynamic hop selectors in any of the policies + coexistingIPs, err := m.retrieveDynamicGatewayIPsForPolicies(cacheInfo.policies) + if err != nil { + return err + } + coexistingIPs = coexistingIPs.Union(annotatedGWIPs) + // Filter out the IPs that are not in coexisting. Those IPs are to be deleted. + invalidGWIPs := gateways.gws.Difference(coexistingIPs) + // Filter out the IPs from the coexisting list that are to be kept by calculating the difference between the coexising and those IPs that are to be deleted and not coexisting at the same time. + ipsToKeep := coexistingIPs.Difference(invalidGWIPs) + klog.Infof("Coexisting %s, invalid %s, ipsToKeep %s", strings.Join(sets.List(coexistingIPs), ","), strings.Join(sets.List(invalidGWIPs), ","), strings.Join(sets.List(ipsToKeep), ",")) + err = m.netClient.deleteGatewayIPs(nsName, invalidGWIPs, ipsToKeep) + if err != nil { + return err + } + gateways.gws.Delete(invalidGWIPs.UnsortedList()...) + if gateways.gws.Len() == 0 { + // remove pod from namespace cache + delete(cacheInfo.dynamicGateways, podNamespacedName) + } + return nil +} + +func (m *externalPolicyManager) addPodGatewayToNamespace(podNamespacedName ktypes.NamespacedName, namespaceName string, processedPolicies []*routePolicy) error { + // the pod's gatewayInfo is unique to a namespace as the networkName field can differ depending on the policy definition of that field + // so we retrieve the correct one for the given target namespace from the pre-processed policies. It uses + // the target namespace and the key (pod_namespace,pod_name) as keys. + gatewayInfo, err := m.findGatewayInfoForPodInTargetNamespace(podNamespacedName, namespaceName, processedPolicies) + if err != nil { + return err + } + // use the pod's gatewayInfo to update the logical routes for all the pod's in the target namespace + err = m.addGWRoutesForNamespace(namespaceName, gatewayInfoList{gatewayInfo}) + if err != nil { + return err + } + cacheInfo, found := m.getNamespaceInfoFromCache(namespaceName) + defer m.unlockNamespaceInfoCache(namespaceName) + if !found { + cacheInfo = m.newNamespaceInfoInCache(namespaceName) + } + // add pod gateway information to the namespace cache + cacheInfo.dynamicGateways[podNamespacedName] = gatewayInfo + return nil +} + +// processUpdatePod takes in an updated gateway pod and the list of old namespaces where the pod was used as egress gateway and proceeds as follows +// - Finds the matching policies that apply to the pod based on the dynamic hop pod and namespace selectors. If the labels in the pod have not changed, the policies will match to the existing one. +// - Based on the policies that use the pod IP as gateway, determine the namespaces where the pod IP will be used as egress gateway. If the namespaces match, return without error +// - Remove the pod IP as egress gateway from the namespaces that are no longer impacted by the pod. This is determined by calculating the difference between the old namespaces and the new ones based on the policies +// applicable to the updated pod. +// - Add the pod IP as egress gateway to the namespaces that are now being impacted by the changes in the pod. +func (m *externalPolicyManager) processUpdatePod(updatedPod *v1.Pod, oldTargetNs sets.Set[string]) error { + + // find the policies that apply to this new pod. Unless there are changes to the labels, they should be identical. + newPodPolicies, err := m.findMatchingDynamicPolicies(updatedPod) + if err != nil { + return err + } + key := ktypes.NamespacedName{Namespace: updatedPod.Namespace, Name: updatedPod.Name} + // aggregate the expected target namespaces based on the new pod's labels and current policies + // if the labels have not changed, the new targeted namespaces and the old ones should be identical + newTargetNs, err := m.aggregateTargetNamespacesByPolicies(key, newPodPolicies) + if err != nil { + return err + } + if oldTargetNs.Equal(newTargetNs) { + // targeting the same namespaces. Nothing to do + return nil + } + // the pods have changed and they don't target the same sets of namespaces, delete its reference on the ones that don't apply + // and add to the new ones, if necessary + nsToRemove := oldTargetNs.Difference(newTargetNs) + nsToAdd := newTargetNs.Difference(oldTargetNs) + klog.Infof("Removing pod gateway %s/%s from namespace(s): %s", updatedPod.Namespace, updatedPod.Name, strings.Join(sets.List(nsToRemove), ",")) + klog.Infof("Adding pod gateway %s/%s to namespace(s): %s", updatedPod.Namespace, updatedPod.Name, strings.Join(sets.List(nsToAdd), ",")) + // retrieve the gateway information for the pod + for ns := range nsToRemove { + err = m.removePodGatewayFromNamespace(ns, ktypes.NamespacedName{Namespace: updatedPod.Namespace, Name: updatedPod.Name}) + if err != nil { + return err + } + } + + // pre-process the policies so we can apply them this process extracts from the CR the contents of the policies + // into an internal structure that contains the static and dynamic hops information. + pp, err := m.processExternalRoutePolicies(newPodPolicies) + if err != nil { + return err + } + + for ns := range nsToAdd { + err = m.addPodGatewayToNamespace(ktypes.NamespacedName{Namespace: updatedPod.Namespace, Name: updatedPod.Name}, ns, pp) + if err != nil { + return err + } + } + + return nil +} + +func (m *externalPolicyManager) aggregateTargetNamespacesByPolicies(podName ktypes.NamespacedName, externalRoutePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (sets.Set[string], error) { + targetNamespaces := sets.New[string]() + for _, erp := range externalRoutePolicies { + namespaces, err := m.listNamespacesBySelector(&erp.Spec.From.NamespaceSelector) + if err != nil { + return nil, err + } + for _, ns := range namespaces { + if targetNamespaces.Has(ns.Name) { + klog.Warningf("External gateway pod %s targets namespace %s more than once", podName.Namespace, podName.Name) + continue + } + targetNamespaces = targetNamespaces.Insert(ns.Name) + } + } + return targetNamespaces, nil +} + +func (m *externalPolicyManager) findGatewayInfoForPodInTargetNamespace(key ktypes.NamespacedName, targetNamespace string, processedPolicies []*routePolicy) (*gatewayInfo, error) { + for _, p := range processedPolicies { + namespaces, err := m.listNamespacesBySelector(p.targetNamespacesSelector) + if err != nil { + return nil, err + } + for _, targetNs := range namespaces { + if targetNs.Name == targetNamespace { + return p.dynamicGateways[key], nil + } + } + } + return nil, fmt.Errorf("gateway information for pod %s/%s not found", key.Namespace, key.Name) +} + +// processDeletePod removes the gateway IP derived from the pod. The IP is then removed from all the pods found in the namespaces by the +// network client (north bound as logical static route or in conntrack). +func (m *externalPolicyManager) processDeletePod(pod *v1.Pod, namespaces sets.Set[string]) error { + err := m.deletePodGatewayInNamespaces(pod, namespaces) + if err != nil { + return err + } + return nil +} + +func (m *externalPolicyManager) deletePodGatewayInNamespaces(pod *v1.Pod, targetNamespaces sets.Set[string]) error { + + for nsName := range targetNamespaces { + err := m.deletePodGatewayInNamespace(pod, nsName) + if err != nil { + return err + } + } + return nil +} + +func (m *externalPolicyManager) deletePodGatewayInNamespace(pod *v1.Pod, targetNamespace string) error { + + key := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + cacheInfo, found := m.getNamespaceInfoFromCache(targetNamespace) + if !found { + klog.Warningf("Attempting to delete pod gateway %s/%s from a namespace that does not exist %s", pod.Namespace, pod.Name, targetNamespace) + return nil + } + defer m.unlockNamespaceInfoCache(targetNamespace) + gwInfo, ok := cacheInfo.dynamicGateways[key] + if !ok { + return fmt.Errorf("unable to find cached pod %s/%s external gateway information in namespace %s", pod.Namespace, pod.Name, targetNamespace) + } + annotatedGWIPs, err := m.calculateAnnotatedNamespaceGatewayIPsForNamespace(targetNamespace) + if err != nil { + return err + } + coexistingIPs, err := m.retrieveDynamicGatewayIPsForPolicies(cacheInfo.policies) + if err != nil { + return err + } + coexistingIPs = coexistingIPs.Union(annotatedGWIPs) + // Filter out the IPs that are not in coexisting. Those IPs are to be deleted. + invalidGWIPs := gwInfo.gws.Difference(coexistingIPs) + // Filter out the IPs from the coexisting list that are to be kept by calculating the difference between the coexising and those IPs that are to be deleted and not coexisting at the same time. + ipsToKeep := coexistingIPs.Difference(invalidGWIPs) + klog.Infof("Coexisting %s, invalid %s, ipsToKeep %s", strings.Join(sets.List(coexistingIPs), ","), strings.Join(sets.List(invalidGWIPs), ","), strings.Join(sets.List(ipsToKeep), ",")) + err = m.netClient.deleteGatewayIPs(targetNamespace, invalidGWIPs, ipsToKeep) + if err != nil { + return err + } + gwInfo.gws.Delete(invalidGWIPs.UnsortedList()...) + if cacheInfo.dynamicGateways[key].gws.Len() == 0 { + delete(cacheInfo.dynamicGateways, key) + } + return nil +} + +// processAddPodRoutes applies the policies associated to the pod's namespace to the pod logical route +func (m *externalPolicyManager) applyGatewayInfoToPod(newPod *v1.Pod, static gatewayInfoList, dynamic map[ktypes.NamespacedName]*gatewayInfo) error { + err := m.netClient.addGatewayIPs(newPod, static) + if err != nil { + return err + } + for _, egress := range dynamic { + err := m.netClient.addGatewayIPs(newPod, gatewayInfoList{egress}) + if err != nil { + return err + } + } + return nil +} + +// getRoutePolicyForPodGateway iterates through the dynamic hops of a given external route policy spec to determine the pod's GW information. +// Note that a pod can match multiple policies with different configuration at the same time, with the condition +// that the pod can only target the same namespace once at most. That's a 1-1 pod to namespace match. +func (m *externalPolicyManager) getRoutePolicyForPodGateway(newPod *v1.Pod, externalRoutePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (*routePolicy, error) { + + key := ktypes.NamespacedName{Namespace: newPod.Namespace, Name: newPod.Name} + + pp, err := m.processExternalRoutePolicy(externalRoutePolicy) + if err != nil { + return nil, err + } + if _, ok := pp.dynamicGateways[key]; !ok { + return nil, fmt.Errorf("pod %s not found while processing dynamic hops", key) + } + // store only the information needed + return &routePolicy{ + targetNamespacesSelector: pp.targetNamespacesSelector, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{key: pp.dynamicGateways[key]}, + }, nil + +} + +func getExGwPodIPs(gatewayPod *v1.Pod, networkName string) (sets.Set[string], error) { + if networkName != "" { + return getMultusIPsFromNetworkName(gatewayPod, networkName) + } + if gatewayPod.Spec.HostNetwork { + return getPodIPs(gatewayPod), nil + } + return nil, fmt.Errorf("ignoring pod %s as an external gateway candidate. Invalid combination "+ + "of host network: %t and routing-network annotation: %s", gatewayPod.Name, gatewayPod.Spec.HostNetwork, + networkName) +} + +func getPodIPs(pod *v1.Pod) sets.Set[string] { + foundGws := sets.New[string]() + for _, podIP := range pod.Status.PodIPs { + ip := utilnet.ParseIPSloppy(podIP.IP) + if ip != nil { + foundGws.Insert(ip.String()) + } + } + return foundGws +} + +func getMultusIPsFromNetworkName(pod *v1.Pod, networkName string) (sets.Set[string], error) { + foundGws := sets.New[string]() + var multusNetworks []nettypes.NetworkStatus + err := json.Unmarshal([]byte(pod.ObjectMeta.Annotations[nettypes.NetworkStatusAnnot]), &multusNetworks) + if err != nil { + return nil, fmt.Errorf("unable to unmarshall annotation on pod %s k8s.v1.cni.cncf.io/network-status '%s': %v", + pod.Name, pod.ObjectMeta.Annotations[nettypes.NetworkStatusAnnot], err) + } + for _, multusNetwork := range multusNetworks { + if multusNetwork.Name == networkName { + for _, gwIP := range multusNetwork.IPs { + ip := net.ParseIP(gwIP) + if ip != nil { + foundGws.Insert(ip.String()) + } + } + return foundGws, nil + } + } + return nil, fmt.Errorf("unable to find multus network %s in pod %s/%s", networkName, pod.Namespace, pod.Name) +} + +func (m *externalPolicyManager) filterNamespacesUsingPodGateway(key ktypes.NamespacedName) sets.Set[string] { + namespaces := sets.New[string]() + nsList := m.listNamespaceInfoCache() + for _, namespaceName := range nsList { + cacheInfo, found := m.getNamespaceInfoFromCache(namespaceName) + if !found { + continue + } + if _, ok := cacheInfo.dynamicGateways[key]; ok { + namespaces = namespaces.Insert(namespaceName) + } + m.unlockNamespaceInfoCache(namespaceName) + } + return namespaces +} + +func (m *externalPolicyManager) listPodsInNamespaceWithSelector(namespace string, selector *metav1.LabelSelector) ([]*v1.Pod, error) { + + s, err := metav1.LabelSelectorAsSelector(selector) + if err != nil { + return nil, err + } + return m.podLister.Pods(namespace).List(s) +} + +func containsNamespaceInSlice(nss []*v1.Namespace, podNs string) bool { + for _, ns := range nss { + if ns.Name == podNs { + return true + } + } + return false +} + +func containsPodInSlice(pods []*v1.Pod, podName string) bool { + for _, pod := range pods { + if pod.Name == podName { + return true + } + } + return false +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_pod_test.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_pod_test.go new file mode 100644 index 0000000000..690a1b7637 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_pod_test.go @@ -0,0 +1,500 @@ +package apbroute + +import ( + "context" + "reflect" + "strconv" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/kubernetes/fake" +) + +var _ = Describe("OVN External Gateway policy", func() { + + var ( + namespaceDefault = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "default", + Labels: map[string]string{"name": "default"}}} + namespaceTest = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test", + Labels: map[string]string{"name": "test", "match": "test", "multiple": "true"}}, + } + namespaceTest2 = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test2", + Labels: map[string]string{"name": "test2", "match": "test2", "multiple": "true"}}, + } + + dynamicPolicy = newPolicy( + "dynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + + dynamicPolicyForTest2Only = newPolicy( + "policyForTest2", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test2"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"duplicated": "true"}}, + false, + ) + + overlappingPolicy = newPolicy( + "overlapping", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"duplicated": "true"}}, + false, + ) + + multipleNamespacesPolicy = newPolicy( + "multipleNamespaces", + &v1.LabelSelector{MatchLabels: map[string]string{"multiple": "true"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + + pod1 = newPod("pod_1", "default", "192.168.10.1", map[string]string{"key": "pod", "name": "pod1", "duplicated": "true"}) + pod2 = newPod("pod_2", "default", "192.168.20.1", map[string]string{"key": "pod", "name": "pod2"}) + pod3 = newPod("pod_3", "default", "192.168.30.1", map[string]string{"key": "pod", "name": "pod3"}) + ) + AfterEach(func() { + nbsbCleanup.Cleanup() + }) + + BeforeEach(func() { + initialDB = libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + }, + } + nbClient, _, nbsbCleanup, err = libovsdbtest.NewNBSBTestHarness(initialDB) + Expect(err).NotTo(HaveOccurred()) + stopChan = make(chan struct{}) + + }) + + var _ = Context("When adding a new pod", func() { + + It("processes the pod that is a pod gateway with multiples matching policies each in a different namespaces", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2}, []runtime.Object{multipleNamespacesPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(multipleNamespacesPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(multipleNamespacesPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + _, err := fakeClient.CoreV1().Pods(pod1.Namespace).Create(context.Background(), pod1, v1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(multipleNamespacesPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(multipleNamespacesPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }})) + + }) + + It("processes the pod that has no policy match", func() { + noMatchPolicy := newPolicy( + "noMatchPolicy", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "nomatch"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest}, []runtime.Object{noMatchPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + _, err := fakeClient.CoreV1().Pods(pod1.Namespace).Create(context.Background(), pod1, v1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(noMatchPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + + }) + + It("processes a pod gateway that matches a two policies to the same target namespace", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2}, []runtime.Object{overlappingPolicy, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(overlappingPolicy.Name, dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + _, err := fakeClient.CoreV1().Pods(pod1.Namespace).Create(context.Background(), pod1, v1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(overlappingPolicy.Name, dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }})) + + }) + }) + + var _ = Context("When deleting a pod", func() { + It("deletes a pod gateway that matches two policies, each targeting a different namespace", func() { + dynamicPolicyTest2 := newPolicy( + "dynamicTest2", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test2"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, pod1}, []runtime.Object{dynamicPolicyTest2, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyTest2.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + deletePod(pod1, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyTest2.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + }) + + It("deletes a pod that does not match any policy", func() { + noMatchPolicy := newPolicy( + "nomatch", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "nomatch"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1}, []runtime.Object{noMatchPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(noMatchPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + deletePod(pod1, fakeClient) + Eventually(func() bool { + _, err := fakeClient.CoreV1().Pods(pod1.Namespace).Get(context.Background(), pod1.Name, v1.GetOptions{}) + return apierrors.IsNotFound(err) + }).Should(BeTrue()) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(noMatchPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + }) + + It("deletes a pod gateway that is one of two pods that matches two policies to the same target namespace", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2}, []runtime.Object{overlappingPolicy, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + deletePod(pod1, fakeClient) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(overlappingPolicy.Name, dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + }) + }) + + var _ = Context("When updating a pod", func() { + It("updates an existing pod gateway to match an additional new policy to a new target namespace", func() { + unmatchPod := newPod("unmatchPod", "default", "192.168.100.1", map[string]string{"name": "unmatchPod"}) + initController([]runtime.Object{namespaceDefault, namespaceTest, unmatchPod}, []runtime.Object{dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + updatePodLabels(unmatchPod, pod1.Labels, fakeClient) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: unmatchPod.Name}: { + gws: sets.New(unmatchPod.Status.PodIPs[0].IP), + }, + }, + })) + }) + + It("updates an existing pod gateway to match a new policy that targets the same namespace", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, pod2}, []runtime.Object{overlappingPolicy, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name, overlappingPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + updatePodLabels(pod2, map[string]string{"duplicated": "true"}, fakeClient) + // wait for 2 second to ensure that the pod changed have been reconciled. We are doing this because the outcome of the change should not impact the list of dynamic IPs and + // there is no way to know which of the policies apply specifically to the pod. + Eventually(func() bool { + p, err := fakeClient.CoreV1().Pods(pod2.Namespace).Get(context.TODO(), pod2.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return reflect.DeepEqual(p.Labels, map[string]string{"duplicated": "true"}) + }, 2, 2).Should(BeTrue()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name, overlappingPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + }) + + It("updates an existing pod gateway to match a new policy that targets a different namespace", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, pod2, pod3}, []runtime.Object{dynamicPolicyForTest2Only, dynamicPolicy}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + {Namespace: "default", Name: pod3.Name}: { + gws: sets.New(pod3.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyForTest2Only.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + updatePodLabels(pod2, map[string]string{"duplicated": "true"}, fakeClient) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod3.Name}: { + gws: sets.New(pod3.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyForTest2Only.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + }) + It("updates an existing pod gateway to match no policies", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2}, []runtime.Object{dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, time.Minute).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + updatePodLabels(pod1, map[string]string{}, fakeClient) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5, 1).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + }) + + It("updates a pod to match a policy to a single namespace", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, pod1}, []runtime.Object{dynamicPolicyForTest2Only, dynamicPolicy}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyForTest2Only.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + updatePodLabels(pod1, map[string]string{"key": "pod"}, fakeClient) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyForTest2Only.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + }) + + }) +}) + +func deletePod(pod *corev1.Pod, fakeClient *fake.Clientset) { + + err = fakeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func updatePodLabels(pod *corev1.Pod, newLabels map[string]string, fakeClient *fake.Clientset) { + + p, err := fakeClient.CoreV1().Pods(pod.Namespace).Get(context.TODO(), pod.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + incrementResourceVersion(p) + p.Labels = newLabels + _, err = fakeClient.CoreV1().Pods(pod.Namespace).Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func incrementResourceVersion(obj v1.Object) { + var rs int64 + if obj.GetResourceVersion() != "" { + rs, err = strconv.ParseInt(obj.GetResourceVersion(), 10, 64) + Expect(err).NotTo(HaveOccurred()) + } + rs++ + obj.SetResourceVersion(strconv.FormatInt(rs, 10)) +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_policy.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_policy.go new file mode 100644 index 0000000000..895c02406a --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_policy.go @@ -0,0 +1,722 @@ +package apbroute + +import ( + "fmt" + "net" + "reflect" + "strings" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + ktypes "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// processAddPolicy takes in a new policy and applies it. To do that, it aggregates the IPs from the static hops and retrieves the IPs from the pods resulting from applying the +// namespace and pod selectors in the dynamic hops. +// The last step is to store the new policy in the route policy cache so that it can be used in the future to compare against changes in its spec. +func (m *externalPolicyManager) processAddPolicy(routePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (*routePolicy, error) { + + // it's a new policy + processedPolicies, err := m.processExternalRoutePolicy(routePolicy) + if err != nil { + return nil, err + } + err = m.applyProcessedPolicy(routePolicy.Name, processedPolicies) + if err != nil { + return nil, err + } + err = m.storeRoutePolicyInCache(routePolicy) + if err != nil { + return nil, err + } + klog.Infof("Added Admin Policy Based External Route %s", routePolicy.Name) + return processedPolicies, nil +} + +// applyProcessedPolicy takes in a route policy and applies it to each of the namespaces defined in the namespaces selector in the route policy. +// As part of the process, it also updates the namespace info cache with the new gatway information derived from the route policy, so that it keeps +// track for each namespace of the gateway IPs that are being applied and the names of the policies impacting the namespace. +func (m *externalPolicyManager) applyProcessedPolicy(policyName string, routePolicy *routePolicy) error { + targetNs, err := m.listNamespacesBySelector(routePolicy.targetNamespacesSelector) + if err != nil { + return err + } + for _, ns := range targetNs { + cacheInfo, found := m.getNamespaceInfoFromCache(ns.Name) + if !found { + cacheInfo = m.newNamespaceInfoInCache(ns.Name) + } + err = m.applyProcessedPolicyToNamespace(ns.Name, policyName, routePolicy, cacheInfo) + m.unlockNamespaceInfoCache(ns.Name) + if err != nil { + return err + } + } + return nil +} + +// processDeletePolicy takes in a policy, marks it for deletion and proceeds to delete the gateway IPs derived from the static and dynamic hops from the namespaces impacted by the policy, as defined by the namespace +// selector in the from field. The last step is to delete it from the cache. +func (m *externalPolicyManager) processDeletePolicy(policyName string) error { + + // mark the policy for deletion. + // if it's already marked continue processing the delete action as this could be a retry attempt from a previous failed delete run. + // if it's no longer in the cache, return nil + routePolicy, found := m.getAndMarkRoutePolicyForDeletionInCache(policyName) + if !found { + return nil + } + targetNs, err := m.listNamespacesBySelector(&routePolicy.Spec.From.NamespaceSelector) + if err != nil { + return err + } + for _, ns := range targetNs { + cacheInfo, found := m.getNamespaceInfoFromCache(ns.Name) + if !found { + klog.Warningf("Attempting to delete policy %s from a namespace that does not exist %s", routePolicy.Name, ns.Name) + continue + } + err = m.removePolicyFromNamespace(ns.Name, &routePolicy, cacheInfo) + if err != nil { + m.unlockNamespaceInfoCache(ns.Name) + return err + } + if cacheInfo.policies.Len() == 0 { + m.deleteNamespaceInfoInCache(ns.Name) + } + m.unlockNamespaceInfoCache(ns.Name) + } + err = m.deleteRoutePolicyFromCache(routePolicy.Name) + if err != nil { + return err + } + klog.Infof("Deleted Admin Policy Based External Route %s", routePolicy.Name) + return nil +} + +// calculateAnnotatedNamespaceGatewayIPsForNamespace retrieves the list of IPs defined by the legacy annotation gateway logic for namespaces. +// this function is used when deleting gateway IPs to ensure that IPs that overlap with the annotation logic are not deleted from the network resource +// (north bound or conntrack) when the given IP is deleted when removing the policy that references them. +func (m *externalPolicyManager) calculateAnnotatedNamespaceGatewayIPsForNamespace(targetNamespace string) (sets.Set[string], error) { + namespace, err := m.namespaceLister.Get(targetNamespace) + if err != nil { + return nil, err + } + + if annotation, ok := namespace.Annotations[util.RoutingExternalGWsAnnotation]; ok { + exGateways, err := util.ParseRoutingExternalGWAnnotation(annotation) + if err != nil { + return nil, err + } + return exGateways, nil + } + return sets.New[string](), nil + +} + +// calculateAnnotatedPodGatewayIPsForNamespace retrieves the list of IPs defined by the legacy annotation gateway logic for pods. +// this function is used when deleting gateway IPs to ensure that IPs that overlap with the annotation logic are not deleted from the network resource +// (north bound or conntrack) when the given IP is deleted when removing the policy that references them. +func (m *externalPolicyManager) calculateAnnotatedPodGatewayIPsForNamespace(targetNamespace string) (sets.Set[string], error) { + gwIPs := sets.New[string]() + podList, err := m.podLister.List(labels.Everything()) + if err != nil { + return nil, err + } + + for _, pod := range podList { + networkName, ok := pod.Annotations[util.RoutingNetworkAnnotation] + if !ok { + continue + } + targetNamespaces, ok := pod.Annotations[util.RoutingNamespaceAnnotation] + if !ok { + continue + } + foundGws, err := getExGwPodIPs(pod, networkName) + if err != nil { + klog.Errorf("Error getting exgw IPs for pod: %s, error: %v", pod.Name, err) + return nil, err + } + if foundGws.Len() == 0 { + klog.Errorf("No pod IPs found for pod %s/%s", pod.Namespace, pod.Name) + continue + } + tmpNs := sets.New(strings.Split(targetNamespaces, ",")...) + if tmpNs.Has(targetNamespaces) { + gwIPs = gwIPs.Union(foundGws) + } + } + return gwIPs, nil +} + +// deletePolicyInNamespace removes the gateway IPs derived from a policy in a namespace. It takes into account the gateway IPs from the legacy +// annotations and other policies impacting the same namespace to avoid deleting IPs that coexist in other resources. +// In a nutshell, if a gateway IP is only found in the policy being deleted, then the IP is removed from the network resource. But if the IP is +// found in at least a legacy annotation or another policy impacting the namespace, then the IP is not removed from the cache or the network resource (north bound or conntrack) +func (m *externalPolicyManager) deletePolicyInNamespace(namespaceName, policyName string, routePolicy *routePolicy, cacheInfo *namespaceInfo) error { + coexistingPolicies := cacheInfo.policies.Clone().Delete(policyName) + annotatedGWIPs, err := m.calculateAnnotatedNamespaceGatewayIPsForNamespace(namespaceName) + if err != nil { + return err + } + coexistingIPs, err := m.retrieveStaticGatewayIPsForPolicies(coexistingPolicies) + if err != nil { + return err + } + + // don't care if the route is flagged for deletion, delete any gw IPs related to the policy + policy, found, _ := m.getRoutePolicyFromCache(policyName) + if !found { + return fmt.Errorf("policy %s not found", policyName) + } + pp, err := m.processExternalRoutePolicy(&policy) + if err != nil { + return err + } + + static := sets.New[string]() + for _, gatewayInfo := range pp.staticGateways { + static = static.Union(gatewayInfo.gws) + } + for _, gwInfo := range routePolicy.staticGateways { + static = static.Delete(gwInfo.gws.UnsortedList()...) + } + coexistingIPs = coexistingIPs.Union(annotatedGWIPs).Union(static) + + for _, gwInfo := range routePolicy.staticGateways { + // Filter out the IPs that are not in coexisting. Those IPs are to be deleted. + invalidGWIPs := gwInfo.gws.Difference(coexistingIPs) + // Filter out the IPs from the coexisting list that are to be kept by calculating the difference between the coexising and those IPs that are to be deleted and not coexisting at the same time. + ipsToKeep := coexistingIPs.Difference(invalidGWIPs) + klog.Infof("Coexisting %s, invalid %s, ipsToKeep %s", strings.Join(sets.List(coexistingIPs), ","), strings.Join(sets.List(invalidGWIPs), ","), strings.Join(sets.List(ipsToKeep), ",")) + err := m.netClient.deleteGatewayIPs(namespaceName, invalidGWIPs, ipsToKeep) + if err != nil { + return err + } + if gwInfo.gws.Equal(invalidGWIPs) { + cacheInfo.staticGateways = cacheInfo.staticGateways.Delete(gwInfo) + continue + } + gwInfo.gws = gwInfo.gws.Delete(invalidGWIPs.UnsortedList()...) + } + + annotatedGWIPs, err = m.calculateAnnotatedPodGatewayIPsForNamespace(namespaceName) + if err != nil { + return err + } + + coexistingIPs, err = m.retrieveDynamicGatewayIPsForPolicies(coexistingPolicies) + if err != nil { + return err + } + + dynamic := sets.New[string]() + for _, gatewayInfo := range pp.dynamicGateways { + dynamic = static.Union(gatewayInfo.gws) + } + for _, gwInfo := range routePolicy.dynamicGateways { + dynamic = dynamic.Delete(gwInfo.gws.UnsortedList()...) + } + coexistingIPs = coexistingIPs.Union(annotatedGWIPs).Union(dynamic) + + for pod, gwInfo := range routePolicy.dynamicGateways { + // Filter out the IPs that are not in coexisting. Those IPs are to be deleted. + invalidGWIPs := gwInfo.gws.Difference(coexistingIPs) + // Filter out the IPs from the coexisting list that are to be kept by calculating the difference between the coexising and those IPs that are to be deleted and not coexisting at the same time. + ipsToKeep := coexistingIPs.Difference(invalidGWIPs) + klog.Infof("Coexisting %s, invalid %s, ipsToKeep %s", strings.Join(sets.List(coexistingIPs), ","), strings.Join(sets.List(invalidGWIPs), ","), strings.Join(sets.List(ipsToKeep), ",")) + err := m.netClient.deleteGatewayIPs(namespaceName, invalidGWIPs, ipsToKeep) + if err != nil { + return err + } + if gwInfo.gws.Equal(invalidGWIPs) { + // delete cached information for the pod gateway + delete(cacheInfo.dynamicGateways, pod) + continue + } + gwInfo.gws = gwInfo.gws.Delete(invalidGWIPs.UnsortedList()...) + } + return nil +} + +// applyProcessedPolicyToNamespace applies the gateway IPs derived from the processed policy to a namespace and updates the cache information for the namespace. +func (m *externalPolicyManager) applyProcessedPolicyToNamespace(namespaceName, policyName string, routePolicy *routePolicy, cacheInfo *namespaceInfo) error { + + if routePolicy.staticGateways.Len() > 0 { + err := m.addGWRoutesForNamespace(namespaceName, routePolicy.staticGateways) + if err != nil { + return err + } + var duplicated sets.Set[string] + cacheInfo.staticGateways, duplicated = cacheInfo.staticGateways.Insert(routePolicy.staticGateways...) + if duplicated.Len() > 0 { + klog.Warningf("Found duplicated gateway IP(s) %+s in policy %s", sets.List(duplicated), policyName) + } + } + for pod, info := range routePolicy.dynamicGateways { + err := m.addGWRoutesForNamespace(namespaceName, gatewayInfoList{info}) + if err != nil { + return err + } + cacheInfo.dynamicGateways[pod] = info + } + cacheInfo.policies = cacheInfo.policies.Insert(policyName) + return nil +} + +// processUpdatePolicy takes in the current and updated version of a given policy and applies the following logic: +// * Determine the changes between the current and updated version. +// * Remove the static and dynamic hop entries in the namespaces impacted by the current version of the policy that are in the current policy but not in the updated version. +// * Apply the static and dynamic hop entries in the namespaces impacted by the updated version of the policy that are in the updated version but not in the current version. +// * Store the updated policy in the route policy cache. +func (m *externalPolicyManager) processUpdatePolicy(currentPolicy, updatedPolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (*routePolicy, error) { + klog.Infof("Processing update for Admin Policy Based External Route '%s'", currentPolicy.Name) + + // To update the policies, first we'll process the diff between old and new and remove the discrepancies that are not found in the new object. + // Afterwards, we'll process the diff between the new and the old and apply the new policies not found in the old policy, ensuring that we are not reduplicating the gatewayInfo. + err := m.removeDiscrepanciesInRoutePolicy(currentPolicy, updatedPolicy) + if err != nil { + return nil, err + } + // At this point we have removed all the aspects of the current policy that no longer applies. Next step is to apply the parts of the new policy that are not in the current one. + err = m.applyUpdatesInRoutePolicy(currentPolicy, updatedPolicy) + if err != nil { + return nil, err + } + + // update the cache to ensure it reflects the latest copy + err = m.storeRoutePolicyInCache(updatedPolicy) + if err != nil { + return nil, err + } + klog.Infof("Updated Admin Policy Based External Route %s", currentPolicy.Name) + return m.processExternalRoutePolicy(updatedPolicy) +} + +func (m *externalPolicyManager) applyUpdatesInRoutePolicy(currentPolicy, newPolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + additionalNamespaces, additionalStaticHops, additionalDynamicHops, err := m.calculatePolicyDifferences(newPolicy, currentPolicy) + if err != nil { + return err + } + // apply the new policy to the new namespaces where the policy now applies + for additionalNs := range additionalNamespaces { + cacheInfo, found := m.getNamespaceInfoFromCache(additionalNs) + if !found { + // if not found create a new one + cacheInfo = m.newNamespaceInfoInCache(additionalNs) + } + err := m.applyPolicyToNamespace(additionalNs, newPolicy, cacheInfo) + m.unlockNamespaceInfoCache(additionalNs) + if err != nil { + return err + } + } + + processedStaticHops, err := m.processStaticHopsGatewayInformation(additionalStaticHops) + if err != nil { + return err + } + processedDynamicHops, err := m.processDynamicHopsGatewayInformation(additionalDynamicHops) + if err != nil { + return err + } + // retrieve all new namespaces + nsList, err := m.listNamespacesBySelector(&newPolicy.Spec.From.NamespaceSelector) + if err != nil { + return err + } + for _, ns := range nsList { + if additionalNamespaces.Has(ns.Name) { + // policy has already been fully applied to this namespace by the previous operation + continue + } + cacheInfo, found := m.getNamespaceInfoFromCache(ns.Name) + if !found { + cacheInfo = m.newNamespaceInfoInCache(ns.Name) + } + err = m.applyProcessedPolicyToNamespace(ns.Name, currentPolicy.Name, &routePolicy{dynamicGateways: processedDynamicHops, staticGateways: processedStaticHops}, cacheInfo) + m.unlockNamespaceInfoCache(ns.Name) + if err != nil { + return err + } + } + + return nil +} + +func (m *externalPolicyManager) removeDiscrepanciesInRoutePolicy(currentPolicy, updatedPolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + unmatchingNamespaces, unmatchingStaticHops, unmatchingDynamicHops, err := m.calculatePolicyDifferences(currentPolicy, updatedPolicy) + if err != nil { + return err + } + // delete the namespaces where this policy no longer applies + for unmatchNs := range unmatchingNamespaces { + cacheInfo, found := m.getNamespaceInfoFromCache(unmatchNs) + if !found { + klog.Warningf("Attempting to delete policy %s from a namespace that does not exist %s", currentPolicy.Name, unmatchNs) + continue + } + err := m.removePolicyFromNamespace(unmatchNs, currentPolicy, cacheInfo) + if err != nil { + m.unlockNamespaceInfoCache(unmatchNs) + return err + } + if cacheInfo.policies.Len() == 0 { + m.deleteNamespaceInfoInCache(unmatchNs) + } + m.unlockNamespaceInfoCache(unmatchNs) + } + + // delete the hops that no longer apply from all the current policy's applicable namespaces + processedStaticHops, err := m.processStaticHopsGatewayInformation(unmatchingStaticHops) + if err != nil { + return err + } + processedDynamicHops, err := m.processDynamicHopsGatewayInformation(unmatchingDynamicHops) + if err != nil { + return err + } + // retrieve all current namespaces + nsList, err := m.listNamespacesBySelector(¤tPolicy.Spec.From.NamespaceSelector) + if err != nil { + return err + } + for _, ns := range nsList { + if unmatchingNamespaces.Has(ns.Name) { + // policy has already been deleted in this namespace by the previous operation + continue + } + cacheInfo, found := m.getNamespaceInfoFromCache(ns.Name) + if !found { + klog.Warningf("Attempting to update policy %s for a namespace that does not exist %s", currentPolicy.Name, ns.Name) + continue + } + err = m.deletePolicyInNamespace(ns.Name, currentPolicy.Name, &routePolicy{dynamicGateways: processedDynamicHops, staticGateways: processedStaticHops}, cacheInfo) + if err != nil { + m.unlockNamespaceInfoCache(ns.Name) + return err + } + if cacheInfo.policies.Len() == 0 { + m.deleteNamespaceInfoInCache(ns.Name) + } + m.unlockNamespaceInfoCache(ns.Name) + } + return nil +} + +// addGWRoutesForNamespace handles adding routes for all existing pods in namespace +func (m *externalPolicyManager) addGWRoutesForNamespace(namespace string, egress gatewayInfoList) error { + existingPods, err := m.podLister.Pods(namespace).List(labels.Everything()) + if err != nil { + return fmt.Errorf("failed to get all the pods (%v)", err) + } + for _, pod := range existingPods { + err := m.netClient.addGatewayIPs(pod, egress) + if err != nil { + return err + } + } + return nil +} + +func (m *externalPolicyManager) processStaticHopsGatewayInformation(hops []*adminpolicybasedrouteapi.StaticHop) (gatewayInfoList, error) { + gwList := gatewayInfoList{} + + // collect all the static gateway information from the nextHops slice + for _, h := range hops { + ip := net.ParseIP(h.IP) + if ip == nil { + return nil, fmt.Errorf("could not parse routing external gw annotation value '%s'", h.IP) + } + gwList = append(gwList, &gatewayInfo{gws: sets.New(ip.String()), bfdEnabled: h.BFDEnabled}) + } + return gwList, nil +} + +func (m *externalPolicyManager) processDynamicHopsGatewayInformation(hops []*adminpolicybasedrouteapi.DynamicHop) (map[ktypes.NamespacedName]*gatewayInfo, error) { + podsInfo := map[ktypes.NamespacedName]*gatewayInfo{} + for _, h := range hops { + podNS, err := m.listNamespacesBySelector(h.NamespaceSelector) + if err != nil { + return nil, err + } + for _, ns := range podNS { + s, err := metav1.LabelSelectorAsSelector(&h.PodSelector) + if err != nil { + return nil, err + } + pods, err := m.podLister.Pods(ns.Name).List(s) + if err != nil { + return nil, err + } + for _, pod := range pods { + foundGws, err := getExGwPodIPs(pod, h.NetworkAttachmentName) + if err != nil { + return nil, err + } + // if we found any gateways then we need to update current pods routing in the relevant namespace + if len(foundGws) == 0 { + klog.Warningf("No valid gateway IPs found for requested external gateway pod %s/%s", pod.Namespace, pod.Name) + continue + } + key := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + if _, ok := podsInfo[key]; ok { + klog.Warningf("Found overlapping dynamic hop policy for pod %s, discarding match entry", key) + continue + } + podsInfo[key] = &gatewayInfo{gws: foundGws, bfdEnabled: h.BFDEnabled} + } + } + } + return podsInfo, nil +} + +func (m *externalPolicyManager) processExternalRoutePolicy(policy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (*routePolicy, error) { + var ( + errors []error + ) + staticGWInfo, err := m.processStaticHopsGatewayInformation(policy.Spec.NextHops.StaticHops) + if err != nil { + errors = append(errors, err) + } + + dynamicGWInfo, err := m.processDynamicHopsGatewayInformation(policy.Spec.NextHops.DynamicHops) + if err != nil { + errors = append(errors, err) + } + if len(errors) > 0 { + return nil, kerrors.NewAggregate(errors) + } + return &routePolicy{ + targetNamespacesSelector: &policy.Spec.From.NamespaceSelector, + staticGateways: staticGWInfo, + dynamicGateways: dynamicGWInfo, + }, nil + +} + +func (m *externalPolicyManager) processExternalRoutePolicies(externalRoutePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) ([]*routePolicy, error) { + routePolicies := make([]*routePolicy, 0) + for _, erp := range externalRoutePolicies { + processedPolicies, err := m.processExternalRoutePolicy(erp) + if err != nil { + return nil, err + } + routePolicies = append(routePolicies, processedPolicies) + } + return routePolicies, nil +} + +func (m *externalPolicyManager) findMatchingDynamicPolicies(pod *v1.Pod) ([]*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, error) { + var routePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + crs, err := m.routeLister.List(labels.Everything()) + if err != nil { + return nil, err + } + for _, cr := range crs { + policySpec := adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{ + From: cr.Spec.From, + NextHops: adminpolicybasedrouteapi.ExternalNextHops{DynamicHops: []*adminpolicybasedrouteapi.DynamicHop{}}} + for _, dp := range cr.Spec.NextHops.DynamicHops { + nss, err := m.listNamespacesBySelector(dp.NamespaceSelector) + if err != nil { + return nil, err + } + if !containsNamespaceInSlice(nss, pod.Namespace) { + continue + } + nsPods, err := m.listPodsInNamespaceWithSelector(pod.Namespace, &dp.PodSelector) + if err != nil { + return nil, err + } + if containsPodInSlice(nsPods, pod.Name) { + // add only the hop information that intersects with the pod + policySpec.NextHops.DynamicHops = append(policySpec.NextHops.DynamicHops, dp) + } + } + if len(policySpec.NextHops.DynamicHops) > 0 { + routePolicies = append(routePolicies, &adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + ObjectMeta: metav1.ObjectMeta{ + Name: cr.Name, + }, + Spec: policySpec, + }) + } + + } + return routePolicies, nil +} + +func (m *externalPolicyManager) getPoliciesForNamespace(namespaceName string) (sets.Set[string], error) { + matches := sets.New[string]() + policies, err := m.routeLister.List(labels.Everything()) + if err != nil { + return nil, err + } + + for _, policy := range policies { + targetNamespaces, err := m.listNamespacesBySelector(&policy.Spec.From.NamespaceSelector) + if err != nil { + return nil, err + } + for _, ns := range targetNamespaces { + if namespaceName == ns.Name { + matches = matches.Insert(policy.Name) + } + } + } + + return matches, nil +} + +func (m *externalPolicyManager) aggregateDynamicRouteGatewayInformation(pod *v1.Pod, routePolicy *routePolicy) (map[string]*gatewayInfo, error) { + key := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + gwInfoMap := make(map[string]*gatewayInfo) + targetNs, err := m.listNamespacesBySelector(routePolicy.targetNamespacesSelector) + if err != nil { + return nil, err + } + for _, ns := range targetNs { + if _, ok := gwInfoMap[ns.Name]; ok { + return nil, fmt.Errorf("duplicated target namespace '%s ' while processing external policies for pod %s/%s", ns.Name, pod.Namespace, pod.Name) + } + gwInfoMap[ns.Name] = routePolicy.dynamicGateways[key] + } + return gwInfoMap, nil +} + +// calculatePolicyDifferences determines the differences between two policies in terms of namespaces where the policy applies, and the differences in static and dynamic hops. +// The return values are the namespaces, static hops and dynamic hops that are in the first policy but not in the second instance. +func (m *externalPolicyManager) calculatePolicyDifferences(policy1, policy2 *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (sets.Set[string], []*adminpolicybasedrouteapi.StaticHop, []*adminpolicybasedrouteapi.DynamicHop, error) { + mismatchingNamespaces, err := m.calculateNamespaceSelectorDifferences(&policy1.Spec.From.NamespaceSelector, &policy2.Spec.From.NamespaceSelector) + if err != nil { + return nil, nil, nil, err + } + mismatchingStaticHops := m.calculateStaticHopDifferences(policy1.Spec.NextHops.StaticHops, policy2.Spec.NextHops.StaticHops) + mismatchingDynamicHops, err := m.calculateDynamicHopDifferences(policy1.Spec.NextHops.DynamicHops, policy2.Spec.NextHops.DynamicHops) + if err != nil { + return nil, nil, nil, err + } + + return mismatchingNamespaces, mismatchingStaticHops, mismatchingDynamicHops, nil +} + +// calculateNamespaceSelectorDifferences determines the difference between the first and the second selector. The outcome is a set that contains +// those namespace names that are in the first selector but not found in the second selector. +func (m *externalPolicyManager) calculateNamespaceSelectorDifferences(nsSelector1, nsSelector2 *metav1.LabelSelector) (sets.Set[string], error) { + unmatchingNamespaces := sets.New[string]() + if !reflect.DeepEqual(nsSelector1, nsSelector2) { + nsList1, err := m.listNamespacesBySelector(nsSelector1) + if err != nil { + return nil, err + } + nsList2, err := m.listNamespacesBySelector(nsSelector2) + if err != nil { + return nil, err + } + for _, ns1 := range nsList1 { + var found bool + for _, ns2 := range nsList2 { + if ns1.Name == ns2.Name { + found = true + break + } + } + if !found { + unmatchingNamespaces.Insert(ns1.Name) + } + } + } + return unmatchingNamespaces, nil +} + +// calculateStaticHopDifferences determines the difference between the first slice and the second staticHops slice. The outcome is a slice +// of static hops that are in the staticHop1 slice but not in the staticHop2 slice. +func (m *externalPolicyManager) calculateStaticHopDifferences(staticHops1, staticHops2 []*adminpolicybasedrouteapi.StaticHop) []*adminpolicybasedrouteapi.StaticHop { + diffStatic := make([]*adminpolicybasedrouteapi.StaticHop, 0) + for _, staticHop1 := range staticHops1 { + var found bool + for _, staticHop2 := range staticHops2 { + if reflect.DeepEqual(staticHop1, staticHop2) { + found = true + break + } + } + if !found { + diffStatic = append(diffStatic, staticHop1) + } + } + return diffStatic +} + +// calculateDynamicHopDifferences determines the difference between the first slice and the second dynamicHop slice. The return value is a slice +// of dynamic hops that are in the first slice but not in the second. +func (m *externalPolicyManager) calculateDynamicHopDifferences(dynamicHops1, dynamicHops2 []*adminpolicybasedrouteapi.DynamicHop) ([]*adminpolicybasedrouteapi.DynamicHop, error) { + diffDynamic := make([]*adminpolicybasedrouteapi.DynamicHop, 0) + for _, dynamicHop1 := range dynamicHops1 { + var found bool + for _, dynamicHop2 := range dynamicHops2 { + + if reflect.DeepEqual(dynamicHop1, dynamicHop2) { + found = true + break + } + } + if !found { + diffDynamic = append(diffDynamic, dynamicHop1) + } + } + return diffDynamic, nil +} + +// retrieveDynamicGatewayIPsForPolicies returns all the gateway IPs from the dynamic hops of all the policies in the set. This function is used +// to retrieve the dynamic gateway IPs from all the policies applicable to a specific namespace. +func (m *externalPolicyManager) retrieveDynamicGatewayIPsForPolicies(coexistingPolicies sets.Set[string]) (sets.Set[string], error) { + coexistingDynamicIPs := sets.New[string]() + + for name := range coexistingPolicies { + policy, err := m.routeLister.Get(name) + if err != nil { + klog.Warningf("Unable to find route policy %s:%+v", name, err) + continue + } + pp, err := m.processDynamicHopsGatewayInformation(policy.Spec.NextHops.DynamicHops) + if err != nil { + return nil, err + } + for _, gatewayInfo := range pp { + coexistingDynamicIPs = coexistingDynamicIPs.Union(gatewayInfo.gws) + } + } + return coexistingDynamicIPs, nil +} + +// retrieveStaticGatewayIPsForPolicies returns all the gateway IPs from the static hops of all the policies in the set. This function is used +// to retrieve the static gateway IPs from all the policies applicable to a specific namespace. +func (m *externalPolicyManager) retrieveStaticGatewayIPsForPolicies(policies sets.Set[string]) (sets.Set[string], error) { + coexistingStaticIPs := sets.New[string]() + + for name := range policies { + policy, err := m.routeLister.Get(name) + if err != nil { + klog.Warningf("Unable to find route policy %s:%+v", name, err) + continue + } + pp, err := m.processStaticHopsGatewayInformation(policy.Spec.NextHops.StaticHops) + if err != nil { + return nil, err + } + for _, gatewayInfo := range pp { + coexistingStaticIPs = coexistingStaticIPs.Union(gatewayInfo.gws) + } + } + return coexistingStaticIPs, nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_policy_test.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_policy_test.go new file mode 100644 index 0000000000..ce5b56a4e1 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_policy_test.go @@ -0,0 +1,723 @@ +package apbroute + +import ( + "context" + "sort" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes/fake" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "k8s.io/apimachinery/pkg/runtime" +) + +func newPod(podName, namespace, hostIP string, labels map[string]string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: podName, Namespace: namespace, + Labels: labels}, + Spec: corev1.PodSpec{HostNetwork: true}, + Status: corev1.PodStatus{PodIPs: []corev1.PodIP{{IP: hostIP}}, Phase: corev1.PodRunning}, + } +} + +func listRoutePolicyInCache() []string { + return externalController.mgr.routePolicySyncCache.GetKeys() +} + +var ( + externalController *ExternalGatewayMasterController + iFactory *factory.WatchFactory + stopChan chan (struct{}) + initialDB libovsdbtest.TestSetup + nbClient libovsdbclient.Client + nbsbCleanup *libovsdbtest.Cleanup + fakeRouteClient *adminpolicybasedrouteclient.Clientset + fakeClient *fake.Clientset + mgr *externalPolicyManager + err error +) + +func initController(k8sObjects, routePolicyObjects []runtime.Object) { + stopChan = make(chan struct{}) + fakeClient = fake.NewSimpleClientset(k8sObjects...) + fakeRouteClient = adminpolicybasedrouteclient.NewSimpleClientset(routePolicyObjects...) + iFactory, err = factory.NewMasterWatchFactory(&util.OVNMasterClientset{KubeClient: fakeClient}) + Expect(err).NotTo(HaveOccurred()) + iFactory.Start() + externalController, err = NewExternalMasterController(controllerName, fakeClient, + fakeRouteClient, + stopChan, + iFactory.PodCoreInformer(), + iFactory.NamespaceInformer(), + iFactory.NodeCoreInformer().Lister(), + nbClient, + addressset.NewFakeAddressSetFactory(controllerName)) + Expect(err).NotTo(HaveOccurred()) + mgr = externalController.mgr + go func() { + externalController.Run(5) + }() +} + +var _ = Describe("OVN External Gateway policy", func() { + + var ( + namespaceDefault = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "default", + Labels: map[string]string{"name": "default"}}} + namespaceTest = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test", + Labels: map[string]string{"name": "test", "match": "test"}}, + } + namespaceTest2 = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test2", + Labels: map[string]string{"name": "test2", "match": "test"}}, + } + + dynamicPolicy = newPolicy( + "dynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + + staticPolicy = newPolicy( + "static", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP), + nil, + nil, + false, + ) + + pod1 = newPod("pod_1", "default", "192.168.10.1", map[string]string{"key": "pod", "name": "pod1", "duplicated": "true"}) + pod2 = newPod("pod_2", "default", "192.168.20.1", map[string]string{"key": "pod", "name": "pod2"}) + pod3 = newPod("pod_3", "default", "192.168.30.1", map[string]string{"key": "pod", "name": "pod3"}) + pod4 = newPod("pod_4", "default", "192.168.40.1", map[string]string{"key": "pod", "name": "pod4"}) + pod5 = newPod("pod_5", "default", "192.168.50.1", map[string]string{"key": "pod", "name": "pod5"}) + pod6 = newPod("pod_6", "default", "192.168.60.1", map[string]string{"key": "pod", "name": "pod6"}) + ) + AfterEach(func() { + close(stopChan) + nbsbCleanup.Cleanup() + }) + + BeforeEach(func() { + initialDB = libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + }, + } + nbClient, _, nbsbCleanup, err = libovsdbtest.NewNBSBTestHarness(initialDB) + Expect(err).NotTo(HaveOccurred()) + stopChan = make(chan struct{}) + + }) + + var _ = Context("When adding new policies", func() { + + var ( + namespaceTest3 = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test3", + Labels: map[string]string{"name": "test3", "match": "test"}}, + } + multipleMatchPolicy = newPolicy( + "multiple", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test"}}, + sets.New("10.10.10.1"), + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + ) + It("registers the new policy with multiple namespace matching", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, namespaceTest3, pod1}, []runtime.Object{multipleMatchPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func(g Gomega) { + p, found, _ := externalController.mgr.getRoutePolicyFromCache(multipleMatchPolicy.Name) + g.Expect(found).To(BeTrue()) + g.Expect(p.Spec).To(BeEquivalentTo(multipleMatchPolicy.Spec)) + }, 5).Should(Succeed()) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(3)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(multipleMatchPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(multipleMatchPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest3.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(multipleMatchPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + + It("registers a new policy with no namespace match", func() { + initController([]runtime.Object{namespaceTest2, namespaceDefault, pod1}, []runtime.Object{dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(0)) + }) + + It("registers a new policy with multiple dynamic and static GWs and bfd enabled on all gateways", func() { + + staticMultiIPPolicy := newPolicy("multiIPPolicy", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New("10.10.10.1", "10.10.10.2", "10.10.10.3", "10.10.10.3", "10.10.10.4"), + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + true, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2, pod3, pod4, pod5, pod6}, []runtime.Object{staticMultiIPPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5). + Should(BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticMultiIPPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New("10.10.10.1"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.2"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.3"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.4"), + bfdEnabled: true, + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: pod3.Name}: { + gws: sets.New(pod3.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod4.Name}: { + gws: sets.New(pod4.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod5.Name}: { + gws: sets.New(pod5.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod6.Name}: { + gws: sets.New(pod6.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + }})) + + }) + + It("registers a second policy with no overlaping IPs", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1}, []runtime.Object{staticPolicy, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5). + Should(HaveLen(2)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name, dynamicPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + It("registers policies with overlaping IPs for static and dynamic hops", func() { + duplicatedStatic := newPolicy("overlappingStatic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP, "172.1.1.1"), + nil, + nil, + false) + duplicatedDynamic := newPolicy( + "duplicatedDynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"duplicated": "true"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1}, []runtime.Object{staticPolicy, duplicatedStatic, dynamicPolicy, duplicatedDynamic}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5). + Should(HaveLen(4)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name, dynamicPolicy.Name, duplicatedStatic.Name, duplicatedDynamic.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New("172.1.1.1"), + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + }) + + var _ = Context("when deleting a policy", func() { + + var ( + duplicatedStatic = newPolicy("duplicatedStatic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP, "172.1.1.1"), + nil, + nil, + false) + duplicatedDynamic = newPolicy( + "duplicatedDynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"duplicated": "true"}}, + false, + ) + ) + It("validates that the IPs of the policy are no longer reflected on the targeted namespaces when the policy is deleted an no other policy overlaps", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1}, []runtime.Object{staticPolicy, dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + deletePolicy(staticPolicy.Name, fakeRouteClient) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + It("validates that the IPs of a deleted policy won't show up in a non-matching namespace after the policy is deleted", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest2, pod1}, []runtime.Object{staticPolicy, dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + deletePolicy(dynamicPolicy.Name, fakeRouteClient) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + }) + + It("validates that an overlapping IP from another policy will not be deleted when one of the overlaping policies is deleted", func() { + + initController([]runtime.Object{namespaceTest, namespaceDefault, pod1}, []runtime.Object{staticPolicy, duplicatedStatic, dynamicPolicy, duplicatedDynamic}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(4)) + deletePolicy(staticPolicy.Name, fakeRouteClient) + deletePolicy(dynamicPolicy.Name, fakeRouteClient) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(duplicatedStatic.Name, duplicatedDynamic.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New("172.1.1.1"), + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + }) + + var _ = Context("when updating a policy", func() { + + It("validates that changing the from selector will retarget the new namespaces", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, pod1}, []runtime.Object{dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), dynamicPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.From.NamespaceSelector = v1.LabelSelector{MatchLabels: namespaceTest2.Labels} + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(BeNil()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + It("validates that changing a static hop from an existing policy will be applied to the target namespaces", func() { + newStaticIP := "10.30.20.1" + staticPolicy := newPolicy( + "static", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP, newStaticIP), + nil, + nil, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest}, []runtime.Object{staticPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { + nsInfo := getNamespaceInfo(namespaceTest.Name) + sort.Sort(nsInfo.staticGateways) + return nsInfo + }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New(newStaticIP), + }, + }, + dynamicGateways: make(map[types.NamespacedName]*gatewayInfo, 0), + })) + + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), staticPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{ + {IP: newStaticIP}, + } + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5*time.Hour).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(newStaticIP), + }, + }, + dynamicGateways: make(map[types.NamespacedName]*gatewayInfo, 0), + })) + + }) + It("validates that changes to a dynamic hop from an existing policy will be applied to the target namespaces", func() { + singlePodDynamicPolicy := newPolicy( + "singlePod", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "pod1"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2}, []runtime.Object{singlePodDynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(singlePodDynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }})) + + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), singlePodDynamicPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.NextHops.DynamicHops[0].PodSelector = v1.LabelSelector{MatchLabels: map[string]string{"name": "pod2"}} + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(singlePodDynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_2"}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }})) + }) + It("validates that removing one of the static hop IPs will be reflected in the route policy", func() { + + staticMultiIPPolicy := newPolicy("multiIPPolicy", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New("10.10.10.1", "10.10.10.2", "10.10.10.3", "10.10.10.3", "10.10.10.4"), + nil, nil, + true, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2, pod3, pod4, pod5, pod6}, []runtime.Object{staticMultiIPPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5). + Should(BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticMultiIPPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New("10.10.10.1"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.2"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.3"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.4"), + bfdEnabled: true, + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), staticMultiIPPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{ + { + IP: "10.10.10.1", + BFDEnabled: true, + }, + { + IP: "10.10.10.2", + BFDEnabled: true, + }, + { + IP: "10.10.10.3", + BFDEnabled: true, + }, + } + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("Validating the static refernces don't contain the last element") + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5). + Should(BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticMultiIPPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New("10.10.10.1"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.2"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.3"), + bfdEnabled: true, + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}})) + }) + It("validates that removing a duplicated static hop IP from an overlapping policy static hop will keep the static IP in the route policy", func() { + + staticMultiIPPolicy := newPolicy("multiIPPolicy", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New("20.10.10.1", "20.10.10.2", "20.10.10.3", "20.10.10.4", staticHopGWIP), + nil, nil, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2, pod3, pod4, pod5, pod6}, []runtime.Object{staticMultiIPPolicy, staticPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() gatewayInfoList { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f.staticGateways + }, 5). + Should(BeEquivalentTo( + gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New("20.10.10.1"), + }, + { + gws: sets.New("20.10.10.2"), + }, + { + gws: sets.New("20.10.10.3"), + }, + { + gws: sets.New("20.10.10.4"), + }, + })) + Eventually(getNamespaceInfo(namespaceTest.Name).policies).Should(BeEquivalentTo(sets.New(staticMultiIPPolicy.Name, staticPolicy.Name))) + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), staticMultiIPPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{ + { + IP: "20.10.10.2", + }, + { + IP: "20.10.10.3", + }, + { + IP: "20.10.10.4", + }, + { + IP: "20.10.20.1", + }, + } + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("Validating the static refernces don't contain the last element") + Eventually(func() gatewayInfoList { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f.staticGateways + }, 5). + Should(BeEquivalentTo( + gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New("20.10.10.1"), + }, + { + gws: sets.New("20.10.10.2"), + }, + { + gws: sets.New("20.10.10.3"), + }, + { + gws: sets.New("20.10.10.4"), + }, + })) + Eventually(getNamespaceInfo(namespaceTest.Name).policies).Should(BeEquivalentTo(sets.New(staticMultiIPPolicy.Name, staticPolicy.Name))) + }) + }) +}) diff --git a/go-controller/pkg/ovn/controller/apbroute/master_controller.go b/go-controller/pkg/ovn/controller/apbroute/master_controller.go new file mode 100644 index 0000000000..0148483589 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/master_controller.go @@ -0,0 +1,568 @@ +package apbroute + +import ( + "context" + "fmt" + "reflect" + "strings" + "sync" + "time" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + libovsdbclient "github.com/ovn-org/libovsdb/client" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + "k8s.io/client-go/kubernetes" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + adminpolicybasedrouteinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions" + adminpolicybasedroutelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" +) + +const ( + resyncInterval = 0 + maxRetries = 15 +) + +var ( + controllerName string +) + +// Admin Policy Based Route services + +type ExternalGatewayMasterController struct { + client kubernetes.Interface + apbRoutePolicyClient adminpolicybasedrouteclient.Interface + stopCh <-chan struct{} + + // route policies + + // routerInformer v1apbinformer.AdminPolicyBasedExternalRouteInformer + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister + routeSynced cache.InformerSynced + routeQueue workqueue.RateLimitingInterface + + // Pods + podLister corev1listers.PodLister + podSynced cache.InformerSynced + podQueue workqueue.RateLimitingInterface + + // Namespaces + namespaceQueue workqueue.RateLimitingInterface + namespaceLister corev1listers.NamespaceLister + namespaceSynced cache.InformerSynced + + // External gateway caches + // Make them public so that they can be used by the annotation logic to lock on namespaces and share the same external route information + ExternalGWCache map[ktypes.NamespacedName]*ExternalRouteInfo + ExGWCacheMutex *sync.RWMutex + + routePolicyInformer adminpolicybasedrouteinformer.SharedInformerFactory + + mgr *externalPolicyManager + nbClient *northBoundClient +} + +func NewExternalMasterController( + parentControllerName string, + client kubernetes.Interface, + apbRoutePolicyClient adminpolicybasedrouteclient.Interface, + stopCh <-chan struct{}, + podInformer coreinformers.PodInformer, + namespaceInformer coreinformers.NamespaceInformer, + nodeLister corev1listers.NodeLister, + nbClient libovsdbclient.Client, + addressSetFactory addressset.AddressSetFactory, +) (*ExternalGatewayMasterController, error) { + + controllerName = parentControllerName + routePolicyInformer := adminpolicybasedrouteinformer.NewSharedInformerFactory(apbRoutePolicyClient, resyncInterval) + externalRouteInformer := routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes() + externalGWCache := make(map[ktypes.NamespacedName]*ExternalRouteInfo) + exGWCacheMutex := &sync.RWMutex{} + nbCli := &northBoundClient{ + routeLister: externalRouteInformer.Lister(), + nodeLister: nodeLister, + nbClient: nbClient, + addressSetFactory: addressSetFactory, + externalGWCache: externalGWCache, + exGWCacheMutex: exGWCacheMutex, + } + + c := &ExternalGatewayMasterController{ + client: client, + apbRoutePolicyClient: apbRoutePolicyClient, + stopCh: stopCh, + routeLister: externalRouteInformer.Lister(), + routeSynced: externalRouteInformer.Informer().HasSynced, + routeQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(time.Second, 5*time.Second, 5), + "adminpolicybasedexternalroutes", + ), + podLister: podInformer.Lister(), + podSynced: podInformer.Informer().HasSynced, + podQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(time.Second, 5*time.Second, 5), + "apbexternalroutepods", + ), + namespaceLister: namespaceInformer.Lister(), + namespaceSynced: namespaceInformer.Informer().HasSynced, + namespaceQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(time.Second, 5*time.Second, 5), + "apbexternalroutenamespaces", + ), + ExternalGWCache: externalGWCache, + ExGWCacheMutex: exGWCacheMutex, + routePolicyInformer: routePolicyInformer, + nbClient: nbCli, + mgr: newExternalPolicyManager( + stopCh, + podInformer.Lister(), + namespaceInformer.Lister(), + routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes().Lister(), + nbCli), + } + + _, err := namespaceInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onNamespaceAdd, + UpdateFunc: c.onNamespaceUpdate, + DeleteFunc: c.onNamespaceDelete, + })) + if err != nil { + return nil, err + } + + _, err = podInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onPodAdd, + UpdateFunc: c.onPodUpdate, + DeleteFunc: c.onPodDelete, + })) + if err != nil { + return nil, err + } + _, err = externalRouteInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onPolicyAdd, + UpdateFunc: c.onPolicyUpdate, + DeleteFunc: c.onPolicyDelete, + })) + if err != nil { + return nil, err + } + + return c, nil + +} + +func (c *ExternalGatewayMasterController) Run(threadiness int) { + defer utilruntime.HandleCrash() + klog.Infof("Starting Admin Policy Based Route Controller") + + c.routePolicyInformer.Start(c.stopCh) + + if !cache.WaitForNamedCacheSync("apbexternalroutenamespaces", c.stopCh, c.namespaceSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("apbexternalroutepods", c.stopCh, c.podSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("adminpolicybasedexternalroutes", c.stopCh, c.routeSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + klog.Infof("Repairing Admin Policy Based External Route Services") + c.repair() + + wg := &sync.WaitGroup{} + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // processes route policies + c.runPolicyWorker(wg) + }, time.Second, c.stopCh) + }() + } + + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // detects gateway pod changes and updates the pod's IP and MAC in the northbound DB + c.runPodWorker(wg) + }, time.Second, c.stopCh) + }() + } + + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // detects namespace changes and applies polices that match the namespace selector in the `From` policy field + c.runNamespaceWorker(wg) + }, time.Second, c.stopCh) + }() + } + + // wait until we're told to stop + <-c.stopCh + + c.podQueue.ShutDown() + c.routeQueue.ShutDown() + c.namespaceQueue.ShutDown() + + wg.Wait() + +} + +func (c *ExternalGatewayMasterController) runPolicyWorker(wg *sync.WaitGroup) { + for c.processNextPolicyWorkItem(wg) { + } +} + +func (c *ExternalGatewayMasterController) processNextPolicyWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.routeQueue.Get() + + if shutdown { + return false + } + + defer c.routeQueue.Done(obj) + + item := obj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + klog.Infof("Processing policy %s", item.Name) + err := c.syncRoutePolicy(item) + if err != nil { + if c.routeQueue.NumRequeues(item) < maxRetries { + klog.V(2).InfoS("Error found while processing policy: %w", err) + c.routeQueue.AddRateLimited(item) + return true + } + klog.Warningf("Dropping policy %q out of the queue: %w", item.Name, err) + utilruntime.HandleError(err) + } + c.routeQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayMasterController) syncRoutePolicy(routePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + _, err := c.routeLister.Get(routePolicy.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) { + // DELETE use case + klog.Infof("Deleting policy %s", routePolicy.Name) + err = c.mgr.processDeletePolicy(routePolicy.Name) + if err != nil { + return fmt.Errorf("failed to delete Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + klog.Infof("Policy %s deleted", routePolicy.Name) + return nil + } + currentPolicy, found, markedForDeletion := c.mgr.getRoutePolicyFromCache(routePolicy.Name) + if markedForDeletion { + klog.Warningf("Attempting to add or update route policy %s when it has been marked for deletion. Skipping...", routePolicy.Name) + return nil + } + if !found { + // ADD use case + klog.Infof("Adding policy %s", routePolicy.Name) + pp, err := c.mgr.processAddPolicy(routePolicy) + newErr := c.updateStatusAPBExternalRoute(routePolicy.Name, pp, err) + if err != nil { + return fmt.Errorf("failed to create Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + if newErr != nil { + return fmt.Errorf("failed to update status in Admin Policy Based External Route %s:%w", routePolicy.Name, newErr) + } + return nil + } + // UPDATE use case + klog.Infof("Updating policy %s", routePolicy.Name) + pp, err := c.mgr.processUpdatePolicy(¤tPolicy, routePolicy) + newErr := c.updateStatusAPBExternalRoute(routePolicy.Name, pp, err) + if err != nil { + return fmt.Errorf("failed to update Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + if newErr != nil { + return fmt.Errorf("failed to update status in Admin Policy Based External Route %s:%w", routePolicy.Name, newErr) + } + return nil +} + +func (c *ExternalGatewayMasterController) onPolicyAdd(obj interface{}) { + c.routeQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) onPolicyUpdate(oldObj, newObj interface{}) { + oldRoutePolicy := oldObj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + newRoutePolicy := newObj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + + if oldRoutePolicy.Generation == newRoutePolicy.Generation || + !newRoutePolicy.GetDeletionTimestamp().IsZero() { + return + } + + c.routeQueue.Add(newObj) +} + +func (c *ExternalGatewayMasterController) onPolicyDelete(obj interface{}) { + c.routeQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) onNamespaceAdd(obj interface{}) { + c.namespaceQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) onNamespaceUpdate(oldObj, newObj interface{}) { + oldNamespace := oldObj.(*v1.Namespace) + newNamespace := newObj.(*v1.Namespace) + + if oldNamespace.ResourceVersion == newNamespace.ResourceVersion || !newNamespace.GetDeletionTimestamp().IsZero() { + return + } + c.namespaceQueue.Add(newObj) +} + +func (c *ExternalGatewayMasterController) onNamespaceDelete(obj interface{}) { + c.namespaceQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) runNamespaceWorker(wg *sync.WaitGroup) { + for c.processNextNamespaceWorkItem(wg) { + + } +} + +func (c *ExternalGatewayMasterController) processNextNamespaceWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.namespaceQueue.Get() + + if shutdown { + return false + } + + defer c.namespaceQueue.Done(obj) + + err := c.syncNamespace(obj.(*v1.Namespace)) + if err != nil { + if c.namespaceQueue.NumRequeues(obj) < maxRetries { + klog.V(2).InfoS("Error found while processing namespace %s:%w", obj.(*v1.Namespace), err) + c.namespaceQueue.AddRateLimited(obj) + return true + } + klog.Warningf("Dropping namespace %q out of the queue: %v", obj.(*v1.Namespace).Name, err) + utilruntime.HandleError(err) + } + c.namespaceQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayMasterController) syncNamespace(namespace *v1.Namespace) error { + _, err := c.namespaceLister.Get(namespace.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) { + // DELETE use case + klog.Infof("Deleting namespace reference %s", namespace.Name) + _, found := c.mgr.getNamespaceInfoFromCache(namespace.Name) + if !found { + // namespace is not a recipient for policies + return nil + } + c.mgr.deleteNamespaceInfoInCache(namespace.Name) + c.mgr.unlockNamespaceInfoCache(namespace.Name) + return nil + } + matches, err := c.mgr.getPoliciesForNamespace(namespace.Name) + if err != nil { + return err + } + cacheInfo, found := c.mgr.getNamespaceInfoFromCache(namespace.Name) + if !found && len(matches) == 0 { + // it's not a namespace being cached already and it is not a target for policies, nothing to do + return nil + } + if !found { + // ADD use case + // new namespace or namespace updated its labels and now match a routing policy + defer c.mgr.unlockNamespaceInfoCache(namespace.Name) + cacheInfo = c.mgr.newNamespaceInfoInCache(namespace.Name) + cacheInfo.policies = matches + return c.mgr.processAddNamespace(namespace, cacheInfo) + } + + if !cacheInfo.policies.Equal(matches) { + // UPDATE use case + // policies differ, need to reconcile them + defer c.mgr.unlockNamespaceInfoCache(namespace.Name) + err = c.mgr.processUpdateNamespace(namespace.Name, cacheInfo.policies, matches, cacheInfo) + if err != nil { + return err + } + if cacheInfo.policies.Len() == 0 { + c.mgr.deleteNamespaceInfoInCache(namespace.Name) + } + return nil + } + c.mgr.unlockNamespaceInfoCache(namespace.Name) + return nil + +} + +func (c *ExternalGatewayMasterController) onPodAdd(obj interface{}) { + c.podQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) onPodUpdate(oldObj, newObj interface{}) { + o := oldObj.(*v1.Pod) + n := newObj.(*v1.Pod) + // if labels AND assigned Pod IPs AND networkStatus annotations are the same, skip processing changes to the pod. + if reflect.DeepEqual(o.Labels, n.Labels) && + reflect.DeepEqual(o.Status.PodIPs, n.Status.PodIPs) && + reflect.DeepEqual(o.Annotations[nettypes.NetworkStatusAnnot], n.Annotations[nettypes.NetworkStatusAnnot]) { + return + } + c.podQueue.Add(newObj) +} + +func (c *ExternalGatewayMasterController) onPodDelete(obj interface{}) { + c.podQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) runPodWorker(wg *sync.WaitGroup) { + for c.processNextPodWorkItem(wg) { + } +} + +func (c *ExternalGatewayMasterController) processNextPodWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.podQueue.Get() + + if shutdown { + return false + } + + defer c.podQueue.Done(obj) + + p := obj.(*v1.Pod) + err := c.syncPod(p) + if err != nil { + if c.podQueue.NumRequeues(obj) < maxRetries { + klog.V(2).InfoS("Error found while processing pod %s/%s:%w", p.Namespace, p.Name, err) + c.podQueue.AddRateLimited(obj) + return true + } + klog.Warningf("Dropping pod %s/%s out of the queue: %s", p.Namespace, p.Name, err) + utilruntime.HandleError(err) + } + + c.podQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayMasterController) syncPod(pod *v1.Pod) error { + + _, err := c.podLister.Pods(pod.Namespace).Get(pod.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + namespaces := c.mgr.filterNamespacesUsingPodGateway(ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}) + klog.Infof("Processing pod %s/%s", pod.Namespace, pod.Name) + if apierrors.IsNotFound(err) || !pod.DeletionTimestamp.IsZero() { + // DELETE case + if namespaces.Len() == 0 { + // nothing to do, this pod is not a gateway pod + return nil + } + klog.Infof("Deleting pod gateway %s/%s", pod.Namespace, pod.Name) + return c.mgr.processDeletePod(pod, namespaces) + } + if namespaces.Len() == 0 { + // ADD case: new pod or existing pod that is not a gateway pod and could now be one. + klog.Infof("Adding pod %s/%s", pod.Namespace, pod.Name) + return c.mgr.processAddPod(pod) + } + // UPDATE case + klog.Infof("Updating pod gateway %s/%s", pod.Namespace, pod.Name) + return c.mgr.processUpdatePod(pod, namespaces) +} + +func (c *ExternalGatewayMasterController) updateStatusAPBExternalRoute(routeName string, processedPolicy *routePolicy, processedError error) error { + + routePolicy, err := c.apbRoutePolicyClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), routeName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return err + } + + gwIPs := sets.New[string]() + if processedError == nil { + for _, static := range processedPolicy.staticGateways { + gwIPs = gwIPs.Union(static.gws) + } + for _, dynamic := range processedPolicy.dynamicGateways { + gwIPs = gwIPs.Union(dynamic.gws) + } + } + updateStatus(routePolicy, strings.Join(sets.List(gwIPs), ","), processedError) + _, err = c.apbRoutePolicyClient.K8sV1().AdminPolicyBasedExternalRoutes().UpdateStatus(context.TODO(), routePolicy, metav1.UpdateOptions{}) + if !apierrors.IsNotFound(err) { + return err + } + return nil +} + +func (c *ExternalGatewayMasterController) GetDynamicGatewayIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + return c.mgr.getDynamicGatewayIPsForTargetNamespace(namespaceName) +} + +func (c *ExternalGatewayMasterController) GetStaticGatewayIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + return c.mgr.getStaticGatewayIPsForTargetNamespace(namespaceName) +} + +func updateStatus(route *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, gwIPs string, err error) { + if err != nil { + route.Status.Status = adminpolicybasedrouteapi.FailStatus + route.Status.Messages = append(route.Status.Messages, "Failed to apply policy:%w", err.Error()) + return + } + route.Status.LastTransitionTime = metav1.Time{Time: time.Now()} + route.Status.Status = adminpolicybasedrouteapi.SuccessStatus + route.Status.Messages = append(route.Status.Messages, fmt.Sprintf("Configured external gateway IPs: %s", gwIPs)) + klog.Infof("Updating Admin Policy Based External Route %s with Status: %s, Message: %s", route.Name, route.Status.Status, route.Status.Messages[len(route.Status.Messages)-1]) +} diff --git a/go-controller/pkg/ovn/controller/apbroute/network_client.go b/go-controller/pkg/ovn/controller/apbroute/network_client.go new file mode 100644 index 0000000000..f1e1df3769 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/network_client.go @@ -0,0 +1,744 @@ +package apbroute + +import ( + "fmt" + "net" + "regexp" + "strings" + "sync" + + "github.com/pkg/errors" + "github.com/vishvananda/netlink" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + ktypes "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + "github.com/ovn-org/libovsdb/ovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedroutelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +type networkClient interface { + deleteGatewayIPs(namespaceName string, toBeDeletedGWIPs, toBeKept sets.Set[string]) error + addGatewayIPs(pod *v1.Pod, egress gatewayInfoList) error +} + +type northBoundClient struct { + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister + nodeLister corev1listers.NodeLister + // NorthBound client interface + nbClient libovsdbclient.Client + + // An address set factory that creates address sets + addressSetFactory addressset.AddressSetFactory + externalGWCache map[ktypes.NamespacedName]*ExternalRouteInfo + exGWCacheMutex *sync.RWMutex +} + +type conntrackClient struct { + podLister corev1listers.PodLister +} + +func (nb *northBoundClient) findLogicalRouterStaticRoutesWithPredicate(p func(item *nbdb.LogicalRouterStaticRoute) bool) ([]*nbdb.LogicalRouterStaticRoute, error) { + return libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(nb.nbClient, p) +} +func (nb *northBoundClient) deleteLogicalRouterStaticRoutes(routerName string, lrsrs ...*nbdb.LogicalRouterStaticRoute) error { + return libovsdbops.DeleteLogicalRouterStaticRoutes(nb.nbClient, routerName, lrsrs...) +} + +func (nb *northBoundClient) findLogicalRoutersWithPredicate(p func(item *nbdb.LogicalRouter) bool) ([]*nbdb.LogicalRouter, error) { + return libovsdbops.FindLogicalRoutersWithPredicate(nb.nbClient, p) +} + +// delAllHybridRoutePolicies deletes all the 501 hybrid-route-policies that +// force pod egress traffic to be rerouted to a gateway router for local gateway mode. +// Called when migrating to SGW from LGW. +func (nb *northBoundClient) delAllHybridRoutePolicies() error { + // nuke all the policies + policyPred := func(item *nbdb.LogicalRouterPolicy) bool { + return item.Priority == types.HybridOverlayReroutePriority + } + err := libovsdbops.DeleteLogicalRouterPoliciesWithPredicate(nb.nbClient, types.OVNClusterRouter, policyPred) + if err != nil { + return fmt.Errorf("error deleting hybrid route policies on %s: %v", types.OVNClusterRouter, err) + } + + // nuke all the address-sets. + // if we fail to remove LRP's above, we don't attempt to remove ASes due to dependency constraints. + predicateIDs := libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetHybridNodeRoute, controllerName, nil) + asPred := libovsdbops.GetPredicate[*nbdb.AddressSet](predicateIDs, nil) + err = libovsdbops.DeleteAddressSetsWithPredicate(nb.nbClient, asPred) + if err != nil { + return fmt.Errorf("failed to remove hybrid route address sets: %v", err) + } + + return nil +} + +// delAllLegacyHybridRoutePolicies deletes all the 501 hybrid-route-policies that +// force pod egress traffic to be rerouted to a gateway router for local gateway mode. +// New hybrid route matches on address set, while legacy matches just on pod IP +func (nb *northBoundClient) delAllLegacyHybridRoutePolicies() error { + // nuke all the policies + p := func(item *nbdb.LogicalRouterPolicy) bool { + if item.Priority != types.HybridOverlayReroutePriority { + return false + } + if isNewVer, err := regexp.MatchString(`src\s*==\s*\$`, item.Match); err == nil && isNewVer { + return false + } + return true + } + err := libovsdbops.DeleteLogicalRouterPoliciesWithPredicate(nb.nbClient, types.OVNClusterRouter, p) + if err != nil { + return fmt.Errorf("error deleting legacy hybrid route policies on %s: %v", types.OVNClusterRouter, err) + } + return nil +} + +// deleteGatewayIPs handles deleting static routes for pods on a specific GR. +// If a set of gateways is given, only routes for that gateway are deleted. If no gateways +// are given, all routes for the namespace are deleted. +func (nb *northBoundClient) deleteGatewayIPs(namespace string, toBeDeletedGWIPs, _ sets.Set[string]) error { + for _, routeInfo := range nb.getRouteInfosForNamespace(namespace) { + routeInfo.Lock() + if routeInfo.Deleted { + routeInfo.Unlock() + continue + } + for podIP, routes := range routeInfo.PodExternalRoutes { + for gw, gr := range routes { + if toBeDeletedGWIPs.Has(gw) { + // we cannot delete an external gateway IP from the north bound if it's also being provided by an external gateway annotation or if it is also + // defined by a coexisting policy in the same namespace + if err := nb.deletePodGWRoute(routeInfo, podIP, gw, gr); err != nil { + // if we encounter error while deleting routes for one pod; we return and don't try subsequent pods + routeInfo.Unlock() + return fmt.Errorf("delete pod GW route failed: %w", err) + } + delete(routes, gw) + } + } + } + routeInfo.Unlock() + } + return nil +} + +// getRouteInfosForNamespace returns all routeInfos for a specific namespace +func (nb *northBoundClient) getRouteInfosForNamespace(namespace string) []*ExternalRouteInfo { + nb.exGWCacheMutex.RLock() + defer nb.exGWCacheMutex.RUnlock() + + routes := make([]*ExternalRouteInfo, 0) + for namespacedName, routeInfo := range nb.externalGWCache { + if namespacedName.Namespace == namespace { + routes = append(routes, routeInfo) + } + } + + return routes +} + +func (nb *northBoundClient) addGatewayIPs(pod *v1.Pod, egress gatewayInfoList) error { + if util.PodCompleted(pod) || util.PodWantsHostNetwork(pod) { + return nil + } + podIPs := make([]*net.IPNet, 0) + for _, podIP := range pod.Status.PodIPs { + podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() + cidr := podIPStr + util.GetIPFullMask(podIPStr) + _, ipNet, err := net.ParseCIDR(cidr) + if err != nil { + return fmt.Errorf("failed to parse CIDR: %s, error: %v", cidr, err) + } + podIPs = append(podIPs, ipNet) + } + if len(podIPs) == 0 { + klog.Warningf("Will not add gateway routes pod %s/%s. IPs not found!", pod.Namespace, pod.Name) + return nil + } + if config.Gateway.DisableSNATMultipleGWs { + // delete all perPodSNATs (if this pod was controlled by egressIP controller, it will stop working since + // a pod cannot be used for multiple-external-gateways and egressIPs at the same time) + if err := nb.deletePodSNAT(pod.Spec.NodeName, []*net.IPNet{}, podIPs); err != nil { + klog.Error(err.Error()) + } + } + podNsName := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + return nb.addGWRoutesForPod(egress, podIPs, podNsName, pod.Spec.NodeName) +} + +// deletePodSNAT removes per pod SNAT rules towards the nodeIP that are applied to the GR where the pod resides +// if allSNATs flag is set, then all the SNATs (including against egressIPs if any) for that pod will be deleted +// used when disableSNATMultipleGWs=true +func (nb *northBoundClient) deletePodSNAT(nodeName string, extIPs, podIPNets []*net.IPNet) error { + nats, err := buildPodSNAT(extIPs, podIPNets) + if err != nil { + return err + } + logicalRouter := nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + nodeName, + } + err = libovsdbops.DeleteNATs(nb.nbClient, &logicalRouter, nats...) + if err != nil { + return fmt.Errorf("failed to delete SNAT rule for pod on gateway router %s: %v", logicalRouter.Name, err) + } + return nil +} + +// addEgressGwRoutesForPod handles adding all routes to gateways for a pod on a specific GR +func (nb *northBoundClient) addGWRoutesForPod(gateways []*gatewayInfo, podIfAddrs []*net.IPNet, podNsName ktypes.NamespacedName, node string) error { + gr := util.GetGatewayRouterFromNode(node) + + routesAdded := 0 + portPrefix, err := nb.extSwitchPrefix(node) + if err != nil { + klog.Infof("Failed to find ext switch prefix for %s %v", node, err) + return err + } + + port := portPrefix + types.GWRouterToExtSwitchPrefix + gr + routeInfo, err := nb.ensureRouteInfoLocked(podNsName) + if err != nil { + return fmt.Errorf("failed to ensure routeInfo for %s, error: %v", podNsName, err) + } + defer routeInfo.Unlock() + for _, podIPNet := range podIfAddrs { + for _, gateway := range gateways { + // TODO (trozet): use the go bindings here and batch commands + // validate the ip and gateway belong to the same address family + gws, err := util.MatchAllIPStringFamily(utilnet.IsIPv6(podIPNet.IP), gateway.gws.UnsortedList()) + if err != nil { + klog.Warningf("Address families for the pod address %s and gateway %s did not match", podIPNet.IP.String(), gateway.gws) + continue + } + podIP := podIPNet.IP.String() + for _, gw := range gws { + // if route was already programmed, skip it + if foundGR, ok := routeInfo.PodExternalRoutes[podIP][gw]; ok && foundGR == gr { + routesAdded++ + continue + } + mask := util.GetIPFullMask(podIP) + if err := nb.createOrUpdateBFDStaticRoute(gateway.bfdEnabled, gw, podIP, gr, port, mask); err != nil { + return err + } + if routeInfo.PodExternalRoutes[podIP] == nil { + routeInfo.PodExternalRoutes[podIP] = make(map[string]string) + } + routeInfo.PodExternalRoutes[podIP][gw] = gr + routesAdded++ + if len(routeInfo.PodExternalRoutes[podIP]) == 1 { + if err := nb.addHybridRoutePolicyForPod(podIPNet.IP, node); err != nil { + return err + } + } + } + } + } + // if no routes are added return an error + if routesAdded < 1 { + return fmt.Errorf("gateway specified for namespace %s with gateway addresses %v but no valid routes exist for pod: %s", + podNsName.Namespace, podIfAddrs, podNsName.Name) + } + return nil +} + +// AddHybridRoutePolicyForPod handles adding a higher priority allow policy to allow traffic to be routed normally +// by ecmp routes +func (nb *northBoundClient) addHybridRoutePolicyForPod(podIP net.IP, node string) error { + if config.Gateway.Mode == config.GatewayModeLocal { + // Add podIP to the node's address_set. + asIndex := getHybridRouteAddrSetDbIDs(node, controllerName) + as, err := nb.addressSetFactory.EnsureAddressSet(asIndex) + if err != nil { + return fmt.Errorf("cannot ensure that addressSet for node %s exists %v", node, err) + } + err = as.AddIPs([]net.IP{(podIP)}) + if err != nil { + return fmt.Errorf("unable to add PodIP %s: to the address set %s, err: %v", podIP.String(), node, err) + } + + // add allow policy to bypass lr-policy in GR + ipv4HashedAS, ipv6HashedAS := as.GetASHashNames() + var l3Prefix string + var matchSrcAS string + isIPv6 := utilnet.IsIPv6(podIP) + if isIPv6 { + l3Prefix = "ip6" + matchSrcAS = ipv6HashedAS + } else { + l3Prefix = "ip4" + matchSrcAS = ipv4HashedAS + } + + // get the GR to join switch ip address + grJoinIfAddrs, err := util.GetLRPAddrs(nb.nbClient, types.GWRouterToJoinSwitchPrefix+types.GWRouterPrefix+node) + if err != nil { + return fmt.Errorf("unable to find IP address for node: %s, %s port, err: %v", node, types.GWRouterToJoinSwitchPrefix, err) + } + grJoinIfAddr, err := util.MatchFirstIPNetFamily(utilnet.IsIPv6(podIP), grJoinIfAddrs) + if err != nil { + return fmt.Errorf("failed to match gateway router join interface IPs: %v, err: %v", grJoinIfAddr, err) + } + + var matchDst string + var clusterL3Prefix string + for _, clusterSubnet := range config.Default.ClusterSubnets { + if utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { + clusterL3Prefix = "ip6" + } else { + clusterL3Prefix = "ip4" + } + if l3Prefix != clusterL3Prefix { + continue + } + matchDst += fmt.Sprintf(" && %s.dst != %s", clusterL3Prefix, clusterSubnet.CIDR) + } + + // traffic destined outside of cluster subnet go to GR + matchStr := fmt.Sprintf(`inport == "%s%s" && %s.src == $%s`, types.RouterToSwitchPrefix, node, l3Prefix, matchSrcAS) + matchStr += matchDst + + logicalRouterPolicy := nbdb.LogicalRouterPolicy{ + Priority: types.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{grJoinIfAddr.IP.String()}, + Match: matchStr, + } + p := func(item *nbdb.LogicalRouterPolicy) bool { + return item.Priority == logicalRouterPolicy.Priority && strings.Contains(item.Match, matchSrcAS) + } + err = libovsdbops.CreateOrUpdateLogicalRouterPolicyWithPredicate(nb.nbClient, types.OVNClusterRouter, + &logicalRouterPolicy, p, &logicalRouterPolicy.Nexthops, &logicalRouterPolicy.Match, &logicalRouterPolicy.Action) + if err != nil { + return fmt.Errorf("failed to add policy route %+v to %s: %v", logicalRouterPolicy, types.OVNClusterRouter, err) + } + } + return nil +} + +func (nb *northBoundClient) createOrUpdateBFDStaticRoute(bfdEnabled bool, gw string, podIP, gr, port, mask string) error { + lrsr := nbdb.LogicalRouterStaticRoute{ + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + Nexthop: gw, + IPPrefix: podIP + mask, + OutputPort: &port, + } + + ops := []ovsdb.Operation{} + var err error + if bfdEnabled { + bfd := nbdb.BFD{ + DstIP: gw, + LogicalPort: port, + } + ops, err = libovsdbops.CreateOrUpdateBFDOps(nb.nbClient, ops, &bfd) + if err != nil { + return fmt.Errorf("error creating or updating BFD %+v: %v", bfd, err) + } + lrsr.BFD = &bfd.UUID + } + + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.IPPrefix == lrsr.IPPrefix && + item.Nexthop == lrsr.Nexthop && + item.OutputPort != nil && + *item.OutputPort == *lrsr.OutputPort && + item.Policy == lrsr.Policy + } + ops, err = libovsdbops.CreateOrUpdateLogicalRouterStaticRoutesWithPredicateOps(nb.nbClient, ops, gr, &lrsr, p, + &lrsr.Options) + if err != nil { + return fmt.Errorf("error creating or updating static route %+v on router %s: %v", lrsr, gr, err) + } + + _, err = libovsdbops.TransactAndCheck(nb.nbClient, ops) + if err != nil { + return fmt.Errorf("error transacting static route: %v", err) + } + + return nil +} + +func (nb *northBoundClient) updateExternalGWInfoCacheForPodIPWithGatewayIP(podIP, gwIP, nodeName string, bfdEnabled bool, namespacedName ktypes.NamespacedName) error { + gr := util.GetGatewayRouterFromNode(nodeName) + routeInfo, err := nb.ensureRouteInfoLocked(namespacedName) + if err != nil { + return fmt.Errorf("failed to ensure routeInfo for %s, error: %v", namespacedName.Name, err) + } + defer routeInfo.Unlock() + // if route was already programmed, skip it + if foundGR, ok := routeInfo.PodExternalRoutes[podIP][gwIP]; ok && foundGR == gr { + return nil + } + mask := util.GetIPFullMask(podIP) + + portPrefix, err := nb.extSwitchPrefix(nodeName) + if err != nil { + klog.Infof("Failed to find ext switch prefix for %s %v", nodeName, err) + return err + } + if bfdEnabled { + port := portPrefix + types.GWRouterToExtSwitchPrefix + gr + // update the BFD static route just in case it has changed + if err := nb.createOrUpdateBFDStaticRoute(bfdEnabled, gwIP, podIP, gr, port, mask); err != nil { + return err + } + } else { + _, err := nb.lookupBFDEntry(gwIP, gr, portPrefix) + if err != nil { + err = nb.cleanUpBFDEntry(gwIP, gr, portPrefix) + if err != nil { + return err + } + } + } + + if routeInfo.PodExternalRoutes[podIP] == nil { + routeInfo.PodExternalRoutes[podIP] = make(map[string]string) + } + routeInfo.PodExternalRoutes[podIP][gwIP] = gr + + return nil +} + +// ensureRouteInfoLocked either gets the current routeInfo in the cache with a lock, or creates+locks a new one if missing +func (nb *northBoundClient) ensureRouteInfoLocked(podName ktypes.NamespacedName) (*ExternalRouteInfo, error) { + // We don't want to hold the cache lock while we try to lock the routeInfo (unless we are creating it, then we know + // no one else is using it). This could lead to dead lock. Therefore the steps here are: + // 1. Get the cache lock, try to find the routeInfo + // 2. If routeInfo existed, release the cache lock + // 3. If routeInfo did not exist, safe to hold the cache lock while we create the new routeInfo + nb.exGWCacheMutex.Lock() + routeInfo, ok := nb.externalGWCache[podName] + if !ok { + routeInfo = &ExternalRouteInfo{ + PodExternalRoutes: make(map[string]map[string]string), + PodName: podName, + } + // we are creating routeInfo and going to set it in podExternalRoutes map + // so safe to hold the lock while we create and add it + defer nb.exGWCacheMutex.Unlock() + nb.externalGWCache[podName] = routeInfo + } else { + // if we found an existing routeInfo, do not hold the cache lock + // while waiting for routeInfo to Lock + nb.exGWCacheMutex.Unlock() + } + + // 4. Now lock the routeInfo + routeInfo.Lock() + + // 5. If routeInfo was deleted between releasing the cache lock and grabbing + // the routeInfo lock, return an error so the caller doesn't use it and + // retries the operation later + if routeInfo.Deleted { + routeInfo.Unlock() + return nil, fmt.Errorf("routeInfo for pod %s, was altered during ensure route info", podName) + } + + return routeInfo, nil +} + +func (nb *northBoundClient) deletePodGWRoute(routeInfo *ExternalRouteInfo, podIP, gw, gr string) error { + if utilnet.IsIPv6String(gw) != utilnet.IsIPv6String(podIP) { + return nil + } + + mask := util.GetIPFullMask(podIP) + if err := nb.deleteLogicalRouterStaticRoute(podIP, mask, gw, gr); err != nil { + return fmt.Errorf("unable to delete pod %s ECMP route to GR %s, GW: %s: %w", + routeInfo.PodName, gr, gw, err) + } + + node := util.GetWorkerFromGatewayRouter(gr) + // The gw is deleted from the routes cache after this func is called, length 1 + // means it is the last gw for the pod and the hybrid route policy should be deleted. + if entry := routeInfo.PodExternalRoutes[podIP]; len(entry) == 1 { + if err := nb.delHybridRoutePolicyForPod(net.ParseIP(podIP), node); err != nil { + return fmt.Errorf("unable to delete hybrid route policy for pod %s: err: %v", routeInfo.PodName, err) + } + } + + portPrefix, err := nb.extSwitchPrefix(node) + if err != nil { + return err + } + return nb.cleanUpBFDEntry(gw, gr, portPrefix) +} + +// cleanUpBFDEntry checks if the BFD table entry related to the associated +// gw router / port / gateway ip is referenced by other routing rules, and if +// not removes the entry to avoid having dangling BFD entries. +func (nb *northBoundClient) cleanUpBFDEntry(gatewayIP, gatewayRouter, prefix string) error { + portName := prefix + types.GWRouterToExtSwitchPrefix + gatewayRouter + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + if item.OutputPort != nil && *item.OutputPort == portName && item.Nexthop == gatewayIP && item.BFD != nil && *item.BFD != "" { + return true + } + return false + } + logicalRouterStaticRoutes, err := libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(nb.nbClient, p) + if err != nil { + return fmt.Errorf("cleanUpBFDEntry failed to list routes for %s: %w", portName, err) + } + if len(logicalRouterStaticRoutes) > 0 { + return nil + } + + bfd := nbdb.BFD{ + LogicalPort: portName, + DstIP: gatewayIP, + } + err = libovsdbops.DeleteBFDs(nb.nbClient, &bfd) + if err != nil { + return fmt.Errorf("error deleting BFD %+v: %v", bfd, err) + } + + return nil +} + +func (nb *northBoundClient) deleteLogicalRouterStaticRoute(podIP, mask, gw, gr string) error { + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.Policy != nil && + *item.Policy == nbdb.LogicalRouterStaticRoutePolicySrcIP && + item.IPPrefix == podIP+mask && + item.Nexthop == gw + } + err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(nb.nbClient, gr, p) + if err != nil { + return fmt.Errorf("error deleting static route from router %s: %v", gr, err) + } + + return nil +} + +// DelHybridRoutePolicyForPod handles deleting a logical route policy that +// forces pod egress traffic to be rerouted to a gateway router for local gateway mode. +func (nb *northBoundClient) delHybridRoutePolicyForPod(podIP net.IP, node string) error { + if config.Gateway.Mode == config.GatewayModeLocal { + // Delete podIP from the node's address_set. + asIndex := getHybridRouteAddrSetDbIDs(node, controllerName) + as, err := nb.addressSetFactory.EnsureAddressSet(asIndex) + if err != nil { + return fmt.Errorf("cannot Ensure that addressSet for node %s exists %v", node, err) + } + err = as.DeleteIPs([]net.IP{(podIP)}) + if err != nil { + return fmt.Errorf("unable to remove PodIP %s: to the address set %s, err: %v", podIP.String(), node, err) + } + + // delete hybrid policy to bypass lr-policy in GR, only if there are zero pods on this node. + ipv4HashedAS, ipv6HashedAS := as.GetASHashNames() + ipv4PodIPs, ipv6PodIPs := as.GetIPs() + deletePolicy := false + var l3Prefix string + var matchSrcAS string + if utilnet.IsIPv6(podIP) { + l3Prefix = "ip6" + if len(ipv6PodIPs) == 0 { + deletePolicy = true + } + matchSrcAS = ipv6HashedAS + } else { + l3Prefix = "ip4" + if len(ipv4PodIPs) == 0 { + deletePolicy = true + } + matchSrcAS = ipv4HashedAS + } + if deletePolicy { + var matchDst string + var clusterL3Prefix string + for _, clusterSubnet := range config.Default.ClusterSubnets { + if utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { + clusterL3Prefix = "ip6" + } else { + clusterL3Prefix = "ip4" + } + if l3Prefix != clusterL3Prefix { + continue + } + matchDst += fmt.Sprintf(" && %s.dst != %s", l3Prefix, clusterSubnet.CIDR) + } + matchStr := fmt.Sprintf(`inport == "%s%s" && %s.src == $%s`, types.RouterToSwitchPrefix, node, l3Prefix, matchSrcAS) + matchStr += matchDst + + p := func(item *nbdb.LogicalRouterPolicy) bool { + return item.Priority == types.HybridOverlayReroutePriority && item.Match == matchStr + } + err := libovsdbops.DeleteLogicalRouterPoliciesWithPredicate(nb.nbClient, types.OVNClusterRouter, p) + if err != nil { + return fmt.Errorf("error deleting policy %s on router %s: %v", matchStr, types.OVNClusterRouter, err) + } + } + if len(ipv4PodIPs) == 0 && len(ipv6PodIPs) == 0 { + // delete address set. + err := as.Destroy() + if err != nil { + return fmt.Errorf("failed to remove address set: %s, on: %s, err: %v", + as.GetName(), node, err) + } + } + } + return nil +} + +// extSwitchPrefix returns the prefix of the external switch to use for +// external gateway routes. In case no second bridge is configured, we +// use the default one and the prefix is empty. +func (nb *northBoundClient) extSwitchPrefix(nodeName string) (string, error) { + node, err := nb.nodeLister.Get(nodeName) + if err != nil { + return "", errors.Wrapf(err, "extSwitchPrefix: failed to find node %s", nodeName) + } + l3GatewayConfig, err := util.ParseNodeL3GatewayAnnotation(node) + if err != nil { + return "", errors.Wrapf(err, "extSwitchPrefix: failed to parse l3 gateway annotation for node %s", nodeName) + } + + if l3GatewayConfig.EgressGWInterfaceID != "" { + return types.EgressGWSwitchPrefix, nil + } + return "", nil +} + +func (nb *northBoundClient) lookupBFDEntry(gatewayIP, gatewayRouter, prefix string) (*nbdb.BFD, error) { + portName := prefix + types.GWRouterToExtSwitchPrefix + gatewayRouter + bfd := nbdb.BFD{ + LogicalPort: portName, + DstIP: gatewayIP, + } + found, err := libovsdbops.LookupBFD(nb.nbClient, &bfd) + if err != nil { + klog.Warningf("Failed to lookup BFD for gateway IP %s, gateway router %s and prefix %s", gatewayIP, gatewayRouter, prefix) + return nil, err + } + + return found, nil +} + +// buildPodSNAT builds per pod SNAT rules towards the nodeIP that are applied to the GR where the pod resides +// if allSNATs flag is set, then all the SNATs (including against egressIPs if any) for that pod will be returned +func buildPodSNAT(extIPs, podIPNets []*net.IPNet) ([]*nbdb.NAT, error) { + nats := make([]*nbdb.NAT, 0, len(extIPs)*len(podIPNets)) + var nat *nbdb.NAT + + for _, podIPNet := range podIPNets { + podIP := podIPNet.IP.String() + mask := util.GetIPFullMask(podIP) + _, fullMaskPodNet, err := net.ParseCIDR(podIP + mask) + if err != nil { + return nil, fmt.Errorf("invalid IP: %s and mask: %s combination, error: %v", podIP, mask, err) + } + if len(extIPs) == 0 { + nat = libovsdbops.BuildSNAT(nil, fullMaskPodNet, "", nil) + } else { + for _, gwIPNet := range extIPs { + gwIP := gwIPNet.IP.String() + if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIP) { + continue + } + nat = libovsdbops.BuildSNAT(&gwIPNet.IP, fullMaskPodNet, "", nil) + } + } + nats = append(nats, nat) + } + return nats, nil +} + +func getHybridRouteAddrSetDbIDs(nodeName, controller string) *libovsdbops.DbObjectIDs { + return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetHybridNodeRoute, controller, + map[libovsdbops.ExternalIDKey]string{ + // there is only 1 address set of this type per node + libovsdbops.ObjectNameKey: nodeName, + }) +} + +func (c *conntrackClient) deleteGatewayIPs(namespaceName string, _, toBeKept sets.Set[string]) error { + // loop through all the IPs on the annotations; ARP for their MACs and form an allowlist + var wg sync.WaitGroup + wg.Add(len(toBeKept)) + validMACs := sync.Map{} + klog.Infof("Keeping conntrack entries in namespace %s with gateway IPs %s", namespaceName, strings.Join(sets.List(toBeKept), ",")) + for gwIP := range toBeKept { + go func(gwIP string) { + defer wg.Done() + if len(gwIP) > 0 && !utilnet.IsIPv6String(gwIP) { + // TODO: Add support for IPv6 external gateways + if hwAddr, err := util.GetMACAddressFromARP(net.ParseIP(gwIP)); err != nil { + klog.Errorf("Failed to lookup hardware address for gatewayIP %s: %v", gwIP, err) + } else if len(hwAddr) > 0 { + // we need to reverse the mac before passing it to the conntrack filter since OVN saves the MAC in the following format + // +------------------------------------------------------------ + + // | 128 ... 112 ... 96 ... 80 ... 64 ... 48 ... 32 ... 16 ... 0| + // +------------------+-------+--------------------+-------------| + // | | UNUSED| MAC ADDRESS | UNUSED | + // +------------------+-------+--------------------+-------------+ + for i, j := 0, len(hwAddr)-1; i < j; i, j = i+1, j-1 { + hwAddr[i], hwAddr[j] = hwAddr[j], hwAddr[i] + } + validMACs.Store(gwIP, []byte(hwAddr)) + } + } + }(gwIP) + } + wg.Wait() + + validNextHopMACs := [][]byte{} + validMACs.Range(func(key interface{}, value interface{}) bool { + validNextHopMACs = append(validNextHopMACs, value.([]byte)) + return true + }) + // Handle corner case where there are 0 IPs on the annotations OR none of the ARPs were successful; i.e allowMACList={empty}. + // This means we *need to* pass a label > 128 bits that will not match on any conntrack entry labels for these pods. + // That way any remaining entries with labels having MACs set will get purged. + if len(validNextHopMACs) == 0 { + validNextHopMACs = append(validNextHopMACs, []byte("does-not-contain-anything")) + } + + pods, err := c.podLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("unable to get pods from informer: %v", err) + } + + var errors []error + for _, pod := range pods { + pod := pod + podIPs, err := util.GetPodIPsOfNetwork(pod, &util.DefaultNetInfo{}) + if err != nil { + errors = append(errors, fmt.Errorf("unable to fetch IP for pod %s/%s: %v", pod.Namespace, pod.Name, err)) + } + for _, podIP := range podIPs { // flush conntrack only for UDP + // for this pod, we check if the conntrack entry has a label that is not in the provided allowlist of MACs + // only caveat here is we assume egressGW served pods shouldn't have conntrack entries with other labels set + err := util.DeleteConntrack(podIP.String(), 0, v1.ProtocolUDP, netlink.ConntrackOrigDstIP, validNextHopMACs) + if err != nil { + errors = append(errors, fmt.Errorf("failed to delete conntrack entry for pod with IP %s: %v", podIP.String(), err)) + continue + } + } + } + return kerrors.NewAggregate(errors) +} + +// addGatewayIPs is a NOP (no operation) in the conntrack client as it does not add any entry to the conntrack table. +func (c *conntrackClient) addGatewayIPs(pod *v1.Pod, egress gatewayInfoList) error { + return nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/node_controller.go b/go-controller/pkg/ovn/controller/apbroute/node_controller.go new file mode 100644 index 0000000000..8f0d9fd101 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/node_controller.go @@ -0,0 +1,496 @@ +package apbroute + +import ( + "fmt" + "reflect" + "sync" + "time" + + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + ktypes "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + adminpolicybasedrouteinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions" + + adminpolicybasedroutelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" +) + +// Admin Policy Based Route Node controller + +type ExternalGatewayNodeController struct { + stopCh <-chan struct{} + + // route policies + + // routerInformer v1apbinformer.AdminPolicyBasedExternalRouteInformer + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister + routeSynced cache.InformerSynced + routeQueue workqueue.RateLimitingInterface + + // Pods + podLister corev1listers.PodLister + podSynced cache.InformerSynced + podQueue workqueue.RateLimitingInterface + + // Namespaces + namespaceQueue workqueue.RateLimitingInterface + namespaceLister corev1listers.NamespaceLister + namespaceSynced cache.InformerSynced + + //external gateway caches + //make them public so that they can be used by the annotation logic to lock on namespaces and share the same external route information + ExternalGWCache map[ktypes.NamespacedName]*ExternalRouteInfo + ExGWCacheMutex *sync.RWMutex + + routePolicyInformer adminpolicybasedrouteinformer.SharedInformerFactory + + mgr *externalPolicyManager +} + +func NewExternalNodeController( + apbRoutePolicyClient adminpolicybasedrouteclient.Interface, + podInformer coreinformers.PodInformer, + namespaceInformer coreinformers.NamespaceInformer, + stopCh <-chan struct{}, +) (*ExternalGatewayNodeController, error) { + + namespaceLister := namespaceInformer.Lister() + routePolicyInformer := adminpolicybasedrouteinformer.NewSharedInformerFactory(apbRoutePolicyClient, resyncInterval) + externalRouteInformer := routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes() + + c := &ExternalGatewayNodeController{ + stopCh: stopCh, + routePolicyInformer: routePolicyInformer, + routeLister: routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes().Lister(), + routeSynced: routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes().Informer().HasSynced, + routeQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(1*time.Second, 5*time.Second, 5), + "apbexternalroutes", + ), + podLister: podInformer.Lister(), + podSynced: podInformer.Informer().HasSynced, + podQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(1*time.Second, 5*time.Second, 5), + "apbexternalroutepods", + ), + namespaceLister: namespaceLister, + namespaceSynced: namespaceInformer.Informer().HasSynced, + namespaceQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(1*time.Second, 5*time.Second, 5), + "apbexternalroutenamespaces", + ), + mgr: newExternalPolicyManager( + stopCh, + podInformer.Lister(), + namespaceInformer.Lister(), + routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes().Lister(), + &conntrackClient{podLister: podInformer.Lister()}), + } + + _, err := namespaceInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onNamespaceAdd, + UpdateFunc: c.onNamespaceUpdate, + DeleteFunc: c.onNamespaceDelete, + })) + if err != nil { + return nil, err + } + + _, err = podInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onPodAdd, + UpdateFunc: c.onPodUpdate, + DeleteFunc: c.onPodDelete, + })) + if err != nil { + return nil, err + } + _, err = externalRouteInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onPolicyAdd, + UpdateFunc: c.onPolicyUpdate, + DeleteFunc: c.onPolicyDelete, + })) + if err != nil { + return nil, err + } + + return c, nil + +} + +func (c *ExternalGatewayNodeController) Run(threadiness int) { + defer utilruntime.HandleCrash() + klog.Infof("Starting Admin Policy Based Route Node Controller") + + c.routePolicyInformer.Start(c.stopCh) + + if !cache.WaitForNamedCacheSync("apbexternalroutenamespaces", c.stopCh, c.namespaceSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("apbexternalroutepods", c.stopCh, c.podSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("adminpolicybasedexternalroutes", c.stopCh, c.routeSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + wg := &sync.WaitGroup{} + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // processes route policies + c.runPolicyWorker(wg) + }, time.Second, c.stopCh) + }() + } + + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // detects gateway pod changes and updates the pod's IP and MAC in the northbound DB + c.runPodWorker(wg) + }, time.Second, c.stopCh) + }() + } + + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // detects namespace changes and applies polices that match the namespace selector in the `From` policy field + c.runNamespaceWorker(wg) + }, time.Second, c.stopCh) + }() + } + + // wait until we're told to stop + <-c.stopCh + + c.podQueue.ShutDown() + c.routeQueue.ShutDown() + c.namespaceQueue.ShutDown() + + wg.Wait() + +} + +func (c *ExternalGatewayNodeController) onNamespaceAdd(obj interface{}) { + c.namespaceQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) onNamespaceUpdate(oldObj, newObj interface{}) { + oldNamespace := oldObj.(*v1.Namespace) + newNamespace := newObj.(*v1.Namespace) + + if oldNamespace.ResourceVersion == newNamespace.ResourceVersion || !newNamespace.GetDeletionTimestamp().IsZero() { + return + } + c.namespaceQueue.Add(newObj) +} + +func (c *ExternalGatewayNodeController) onNamespaceDelete(obj interface{}) { + c.namespaceQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) runPolicyWorker(wg *sync.WaitGroup) { + for c.processNextPolicyWorkItem(wg) { + } +} + +func (c *ExternalGatewayNodeController) processNextPolicyWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.routeQueue.Get() + + if shutdown { + return false + } + + defer c.routeQueue.Done(obj) + + item := obj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + klog.Infof("Processing policy %s", item.Name) + err := c.syncRoutePolicy(item) + if err != nil { + if c.routeQueue.NumRequeues(item) < maxRetries { + klog.V(2).InfoS("Error found while processing policy: %v", err.Error()) + c.routeQueue.AddRateLimited(item) + return true + } + klog.Warningf("Dropping policy %q out of the queue: %v", item.Name, err) + utilruntime.HandleError(err) + } + c.routeQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayNodeController) syncRoutePolicy(routePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + _, err := c.routeLister.Get(routePolicy.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) { + // DELETE use case + klog.Infof("Deleting policy %s", routePolicy.Name) + err := c.mgr.processDeletePolicy(routePolicy.Name) + if err != nil { + return fmt.Errorf("failed to delete Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + klog.Infof("Policy %s deleted", routePolicy.Name) + return nil + } + currentPolicy, found, markedForDeletion := c.mgr.getRoutePolicyFromCache(routePolicy.Name) + if markedForDeletion { + klog.Warningf("Attempting to add or update route policy %s when it has been marked for deletion. Skipping...", routePolicy.Name) + return nil + } + if !found { + // ADD use case + klog.Infof("Adding policy %s", routePolicy.Name) + _, err := c.mgr.processAddPolicy(routePolicy) + if err != nil { + return fmt.Errorf("failed to create Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + return nil + } + // UPDATE use case + klog.Infof("Updating policy %s", routePolicy.Name) + _, err = c.mgr.processUpdatePolicy(¤tPolicy, routePolicy) + if err != nil { + return fmt.Errorf("failed to update Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + return nil +} + +func (c *ExternalGatewayNodeController) onPolicyAdd(obj interface{}) { + c.routeQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) onPolicyUpdate(oldObj, newObj interface{}) { + oldRoutePolicy := oldObj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + newRoutePolicy := newObj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + + if oldRoutePolicy.Generation == newRoutePolicy.Generation || + !newRoutePolicy.GetDeletionTimestamp().IsZero() { + return + } + + c.routeQueue.Add(newObj) +} + +func (c *ExternalGatewayNodeController) onPolicyDelete(obj interface{}) { + c.routeQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) runNamespaceWorker(wg *sync.WaitGroup) { + for c.processNextNamespaceWorkItem(wg) { + + } +} + +func (c *ExternalGatewayNodeController) processNextNamespaceWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.namespaceQueue.Get() + + if shutdown { + return false + } + + defer c.namespaceQueue.Done(obj) + + err := c.syncNamespace(obj.(*v1.Namespace)) + if err != nil { + if c.namespaceQueue.NumRequeues(obj) < maxRetries { + klog.V(2).InfoS("Error found while processing namespace %s:%w", obj.(*v1.Namespace), err) + c.namespaceQueue.AddRateLimited(obj) + return true + } + klog.Warningf("Dropping namespace %q out of the queue: %v", obj.(*v1.Namespace).Name, err) + utilruntime.HandleError(err) + } + c.namespaceQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayNodeController) syncNamespace(namespace *v1.Namespace) error { + _, err := c.namespaceLister.Get(namespace.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) || !namespace.DeletionTimestamp.IsZero() { + // DELETE use case + + klog.Infof("Deleting namespace reference %s", namespace.Name) + _, found := c.mgr.getNamespaceInfoFromCache(namespace.Name) + if !found { + // namespace is not a recipient for policies + return nil + } + c.mgr.deleteNamespaceInfoInCache(namespace.Name) + c.mgr.unlockNamespaceInfoCache(namespace.Name) + return nil + } + matches, err := c.mgr.getPoliciesForNamespace(namespace.Name) + if err != nil { + return err + } + cacheInfo, found := c.mgr.getNamespaceInfoFromCache(namespace.Name) + if !found && len(matches) == 0 { + // it's not a namespace being cached already and it is not a target for policies, nothing to do + return nil + } + if !found { + // ADD use case + // new namespace or namespace updated its labels and now match a routing policy + defer c.mgr.unlockNamespaceInfoCache(namespace.Name) + cacheInfo = c.mgr.newNamespaceInfoInCache(namespace.Name) + cacheInfo.policies = matches + return c.mgr.processAddNamespace(namespace, cacheInfo) + } + + if !cacheInfo.policies.Equal(matches) { + // UPDATE use case + // policies differ, need to reconcile them + defer c.mgr.unlockNamespaceInfoCache(namespace.Name) + err = c.mgr.processUpdateNamespace(namespace.Name, cacheInfo.policies, matches, cacheInfo) + if err != nil { + return err + } + if cacheInfo.policies.Len() == 0 { + c.mgr.deleteNamespaceInfoInCache(namespace.Name) + } + return nil + } + c.mgr.unlockNamespaceInfoCache(namespace.Name) + return nil + +} + +func (c *ExternalGatewayNodeController) onPodAdd(obj interface{}) { + o := obj.(*v1.Pod) + // if the pod does not have IPs AND there are no multus network status annotations found, skip it + if len(o.Status.PodIPs) == 0 && len(o.Annotations[nettypes.NetworkStatusAnnot]) == 0 { + return + } + c.podQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) onPodUpdate(oldObj, newObj interface{}) { + o := oldObj.(*v1.Pod) + n := newObj.(*v1.Pod) + + // if labels AND assigned Pod IPs AND the multus network status annotations are the same, skip processing changes to the pod. + if reflect.DeepEqual(o.Labels, n.Labels) && + reflect.DeepEqual(o.Status.PodIPs, n.Status.PodIPs) && + reflect.DeepEqual(o.Annotations[nettypes.NetworkStatusAnnot], n.Annotations[nettypes.NetworkStatusAnnot]) { + return + } + c.podQueue.Add(newObj) +} + +func (c *ExternalGatewayNodeController) onPodDelete(obj interface{}) { + c.podQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) runPodWorker(wg *sync.WaitGroup) { + for c.processNextPodWorkItem(wg) { + } +} + +func (c *ExternalGatewayNodeController) processNextPodWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.podQueue.Get() + + if shutdown { + return false + } + + defer c.podQueue.Done(obj) + + p := obj.(*v1.Pod) + err := c.syncPod(p) + if err != nil { + if c.podQueue.NumRequeues(obj) < maxRetries { + klog.V(2).InfoS("Error found while processing pod %s/%s:%w", p.Namespace, p.Name, err) + c.podQueue.AddRateLimited(obj) + return true + } + klog.Warningf("Dropping pod %s/%s out of the queue: %s", p.Namespace, p.Name, err) + utilruntime.HandleError(err) + } + + c.podQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayNodeController) syncPod(pod *v1.Pod) error { + + _, err := c.podLister.Pods(pod.Namespace).Get(pod.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + namespaces := c.mgr.filterNamespacesUsingPodGateway(ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}) + klog.Infof("Processing pod reference %s/%s", pod.Namespace, pod.Name) + if apierrors.IsNotFound(err) || !pod.DeletionTimestamp.IsZero() { + // DELETE case + if namespaces.Len() == 0 { + // nothing to do, this pod is not a gateway pod + return nil + } + klog.Infof("Deleting pod gateway %s/%s", pod.Namespace, pod.Name) + return c.mgr.processDeletePod(pod, namespaces) + } + if namespaces.Len() == 0 { + // ADD case: new pod or existing pod that is not a gateway pod and could now be one. + klog.Infof("Adding pod reference %s/%s", pod.Namespace, pod.Name) + return c.mgr.processAddPod(pod) + } + // UPDATE case + klog.Infof("Updating pod gateway %s/%s", pod.Namespace, pod.Name) + return c.mgr.processUpdatePod(pod, namespaces) +} + +func (c *ExternalGatewayNodeController) GetAdminPolicyBasedExternalRouteIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + gwIPs, err := c.mgr.getDynamicGatewayIPsForTargetNamespace(namespaceName) + if err != nil { + return nil, err + } + tmpIPs, err := c.mgr.getStaticGatewayIPsForTargetNamespace(namespaceName) + if err != nil { + return nil, err + } + + return gwIPs.Union(tmpIPs), nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/repair.go b/go-controller/pkg/ovn/controller/apbroute/repair.go new file mode 100644 index 0000000000..e1db71fd15 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/repair.go @@ -0,0 +1,367 @@ +package apbroute + +import ( + "net" + "strings" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +type managedGWIPs struct { + namespacedName ktypes.NamespacedName + nodeName string + gwList gatewayInfoList +} + +func (c *ExternalGatewayMasterController) repair() { + start := time.Now() + defer func() { + klog.Infof("Syncing exgw routes took %v", time.Since(start)) + }() + + // migration from LGW to SGW mode + // for shared gateway mode, these LRPs shouldn't exist, so delete them all + if config.Gateway.Mode == config.GatewayModeShared { + if err := c.nbClient.delAllHybridRoutePolicies(); err != nil { + klog.Errorf("Error while removing hybrid policies on moving to SGW mode, error: %v", err) + } + } else if config.Gateway.Mode == config.GatewayModeLocal { + // remove all legacy hybrid route policies + if err := c.nbClient.delAllLegacyHybridRoutePolicies(); err != nil { + klog.Errorf("Error while removing legacy hybrid policies, error: %v", err) + } + } + + // Get all ECMP routes in OVN and build cache + ovnRouteCache := c.buildOVNECMPCache() + + if len(ovnRouteCache) == 0 { + // Even if no ECMP routes exist, we should ensure no 501 LRPs exist either + if err := c.nbClient.delAllHybridRoutePolicies(); err != nil { + klog.Errorf("Error while removing hybrid policies, error: %v", err) + } + // nothing in OVN, so no reason to search for stale routes + return + } + + // Build cache of expected routes in the cluster + // map[podIP]set[podNamespacedName,nodeName,expectedGWIPs] + policyGWIPsMap, err := c.buildExternalIPGatewaysFromPolicyRules() + if err != nil { + klog.Errorf("Error while aggregating the external policy routes: %v", err) + } + + annotatedGWIPsMap, err := c.buildExternalIPGatewaysFromAnnotations() + if err != nil { + klog.Errorf("Cannot retrieve the annotated gateway IPs:%w", err) + } + + // compare caches and see if OVN routes are stale + for podIP, ovnRoutes := range ovnRouteCache { + // pod IP does not exist in the cluster + // remove route and any hybrid policy + expectedNextHopsPolicy, okPolicy := policyGWIPsMap[podIP] + expectedNextHopsAnnotation, okAnnotation := annotatedGWIPsMap[podIP] + if !okPolicy && !okAnnotation { + // No external gateways found for this Pod IP + continue + } + + for _, ovnRoute := range ovnRoutes { + // if length of the output port is 0, this is a legacy route (we now always specify output interface) + if len(ovnRoute.outport) == 0 { + continue + } + + node := util.GetWorkerFromGatewayRouter(ovnRoute.router) + // prefix will signify secondary exgw bridge, or empty if normal setup + // have to determine if a node changed while master was down and if the route swapped from + // the default bridge to a new secondary bridge (or vice versa) + prefix, err := c.nbClient.extSwitchPrefix(node) + if err != nil { + // we shouldn't continue in this case, because we cant be sure this is a route we want to remove + klog.Errorf("Cannot sync exgw route: %+v, unable to determine exgw switch prefix: %v", + ovnRoute, err) + } else if (prefix != "" && !strings.Contains(ovnRoute.outport, prefix)) || + (prefix == "" && strings.Contains(ovnRoute.outport, types.EgressGWSwitchPrefix)) { + continue + } + if expectedNextHopsPolicy != nil { + ovnRoute.shouldExist = c.processOVNRoute(ovnRoute, expectedNextHopsPolicy.gwList, podIP, expectedNextHopsPolicy) + if ovnRoute.shouldExist { + continue + } + } + if expectedNextHopsAnnotation != nil { + ovnRoute.shouldExist = c.processOVNRoute(ovnRoute, expectedNextHopsAnnotation.gwList, podIP, expectedNextHopsAnnotation) + } + } + } + + klog.Infof("OVN ECMP route cache is: %+v", ovnRouteCache) + klog.Infof("Cluster ECMP route cache is: %+v", policyGWIPsMap) + + // iterate through ovn routes and remove any stale entries + for podIP, ovnRoutes := range ovnRouteCache { + podHasAnyECMPRoutes := false + for _, ovnRoute := range ovnRoutes { + if !ovnRoute.shouldExist { + klog.Infof("Found stale exgw ecmp route, podIP: %s, nexthop: %s, router: %s", + podIP, ovnRoute.nextHop, ovnRoute.router) + lrsr := nbdb.LogicalRouterStaticRoute{UUID: ovnRoute.uuid} + err := c.nbClient.deleteLogicalRouterStaticRoutes(ovnRoute.router, &lrsr) + // err := + if err != nil { + klog.Errorf("Error deleting static route %s from router %s: %v", ovnRoute.uuid, ovnRoute.router, err) + } + + // check to see if we should also clean up bfd + node := util.GetWorkerFromGatewayRouter(ovnRoute.router) + // prefix will signify secondary exgw bridge, or empty if normal setup + // have to determine if a node changed while master was down and if the route swapped from + // the default bridge to a new secondary bridge (or vice versa) + prefix, err := c.nbClient.extSwitchPrefix(node) + if err != nil { + // we shouldn't continue in this case, because we cant be sure this is a route we want to remove + klog.Errorf("Cannot sync exgw bfd: %+v, unable to determine exgw switch prefix: %v", + ovnRoute, err) + } else { + if err := c.nbClient.cleanUpBFDEntry(ovnRoute.nextHop, ovnRoute.router, prefix); err != nil { + klog.Errorf("Cannot clean up BFD entry: %w", err) + } + } + + } else { + podHasAnyECMPRoutes = true + } + } + + // if pod had no ECMP routes we need to make sure we remove logical route policy for local gw mode + if !podHasAnyECMPRoutes { + for _, ovnRoute := range ovnRoutes { + gr := strings.TrimPrefix(ovnRoute.router, types.GWRouterPrefix) + if err := c.nbClient.delHybridRoutePolicyForPod(net.ParseIP(podIP), gr); err != nil { + klog.Errorf("Error while removing hybrid policy for pod IP: %s, on node: %s, error: %v", + podIP, gr, err) + } + } + } + } +} + +func (c *ExternalGatewayMasterController) buildExternalIPGatewaysFromPolicyRules() (map[string]*managedGWIPs, error) { + + clusterRouteCache := make(map[string]*managedGWIPs) + externalRoutePolicies, err := c.routeLister.List(labels.Everything()) + if err != nil { + return nil, err + } + + for _, policy := range externalRoutePolicies { + p, err := c.mgr.processExternalRoutePolicy(policy) + if err != nil { + return nil, err + } + // store the policy manifest in the routePolicy cache to avoid hitting the informer every time the annotation logic recalls all the gw IPs from the CRs. + err = c.mgr.storeRoutePolicyInCache(policy) + if err != nil { + return nil, err + } + nsList, err := c.mgr.listNamespacesBySelector(p.targetNamespacesSelector) + if err != nil { + return nil, err + } + allGWIPs := make(gatewayInfoList, 0) + allGWIPs = append(allGWIPs, p.staticGateways...) + for _, gw := range p.dynamicGateways { + allGWIPs = append(allGWIPs, gw) + } + for _, ns := range nsList { + nsPods, err := c.podLister.Pods(ns.Name).List(labels.Everything()) + if err != nil { + return nil, err + } + for _, nsPod := range nsPods { + // ignore completed pods, host networked pods, pods not scheduled + if util.PodWantsHostNetwork(nsPod) || util.PodCompleted(nsPod) || !util.PodScheduled(nsPod) { + continue + } + for _, podIP := range nsPod.Status.PodIPs { + podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() + clusterRouteCache[podIPStr] = &managedGWIPs{namespacedName: ktypes.NamespacedName{Namespace: nsPod.Namespace, Name: nsPod.Name}, nodeName: nsPod.Spec.NodeName, gwList: make(gatewayInfoList, 0)} + for _, gwInfo := range allGWIPs { + for gw := range gwInfo.gws { + if utilnet.IsIPv6String(gw) != utilnet.IsIPv6String(podIPStr) { + continue + } + clusterRouteCache[podIPStr].gwList = append(clusterRouteCache[podIPStr].gwList, gwInfo) + } + } + } + } + } + + } + // flag the route policy cache as populated so that the logic to retrieve the dynamic and static gw IPs from the annotation side can use the cache instead of hitting the informer. + c.mgr.setRoutePolicyCacheAsPopulated() + return clusterRouteCache, nil +} + +func (c *ExternalGatewayMasterController) processOVNRoute(ovnRoute *ovnRoute, gwList gatewayInfoList, podIP string, managedIPGWInfo *managedGWIPs) bool { + // podIP exists, check if route matches + for _, gwInfo := range gwList { + for clusterNextHop := range gwInfo.gws { + if ovnRoute.nextHop == clusterNextHop { + // populate the externalGWInfo cache with this pair podIP->next Hop IP. + err := c.nbClient.updateExternalGWInfoCacheForPodIPWithGatewayIP(podIP, ovnRoute.nextHop, managedIPGWInfo.nodeName, gwInfo.bfdEnabled, managedIPGWInfo.namespacedName) + if err == nil { + return true + } + klog.Errorf("Failed to add cache routeInfo for %s, error: %v", managedIPGWInfo.namespacedName.Name, err) + } + } + } + return false +} + +func (c *ExternalGatewayMasterController) buildExternalIPGatewaysFromAnnotations() (map[string]*managedGWIPs, error) { + clusterRouteCache := make(map[string]*managedGWIPs, 0) + + nsList, err := c.namespaceLister.List(labels.Everything()) + if err != nil { + return nil, err + } + for _, ns := range nsList { + if nsGWIPs, ok := ns.Annotations[util.RoutingExternalGWsAnnotation]; ok && nsGWIPs != "" { + gwInfo := &gatewayInfo{gws: sets.New[string]()} + for _, ip := range strings.Split(nsGWIPs, ",") { + podIPStr := utilnet.ParseIPSloppy(ip).String() + gwInfo.gws.Insert(podIPStr) + } + if _, ok := ns.Annotations[util.BfdAnnotation]; ok { + gwInfo.bfdEnabled = true + } + nsPodList, err := c.podLister.Pods(ns.Name).List(labels.Everything()) + if err != nil { + return nil, err + } + // iterate through all the pods in the namespace and associate the gw ips to those that correspond + populateManagedGWIPsCacheInNamespace(ns.Name, gwInfo, clusterRouteCache, nsPodList) + } + } + + podList, err := c.podLister.List(labels.Everything()) + if err != nil { + return nil, err + } + for _, pod := range podList { + networkName, ok := pod.Annotations[util.RoutingNetworkAnnotation] + if !ok { + continue + } + targetNamespaces, ok := pod.Annotations[util.RoutingNamespaceAnnotation] + if !ok { + continue + } + foundGws, err := getExGwPodIPs(pod, networkName) + if err != nil { + klog.Errorf("Error getting exgw IPs for pod: %s, error: %v", pod.Name, err) + return nil, err + } + if foundGws.Len() == 0 { + klog.Errorf("No pod IPs found for pod %s/%s", pod.Namespace, pod.Name) + continue + } + gwInfo := &gatewayInfo{gws: foundGws} + if _, ok := pod.Annotations[util.BfdAnnotation]; ok { + gwInfo.bfdEnabled = true + } + for _, targetNs := range strings.Split(targetNamespaces, ",") { + // iterate through all pods and associate the gw ips to those that correspond + populateManagedGWIPsCacheInNamespace(targetNs, gwInfo, clusterRouteCache, podList) + } + } + return clusterRouteCache, nil +} + +func populateManagedGWIPsCacheInNamespace(targetNamespace string, gwInfo *gatewayInfo, cache map[string]*managedGWIPs, podList []*v1.Pod) { + for gwIP := range gwInfo.gws { + for _, pod := range podList { + // ignore completed pods, host networked pods, pods not scheduled + if util.PodWantsHostNetwork(pod) || util.PodCompleted(pod) || !util.PodScheduled(pod) { + continue + } + for _, podIP := range pod.Status.PodIPs { + podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() + if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIPStr) { + continue + } + if _, ok := cache[podIPStr]; !ok { + cache[podIPStr] = &managedGWIPs{ + namespacedName: ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, + nodeName: pod.Spec.NodeName, + } + } + cache[podIPStr].gwList = append(cache[podIPStr].gwList, &gatewayInfo{gws: sets.New(gwIP), bfdEnabled: gwInfo.bfdEnabled}) + } + } + } +} + +// Build cache of routes in OVN +// map[podIP][]ovnRoute +type ovnRoute struct { + nextHop string + uuid string + router string + outport string + shouldExist bool +} + +func (c *ExternalGatewayMasterController) buildOVNECMPCache() map[string][]*ovnRoute { + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.Options["ecmp_symmetric_reply"] == "true" + } + logicalRouterStaticRoutes, err := c.nbClient.findLogicalRouterStaticRoutesWithPredicate(p) + if err != nil { + klog.Errorf("CleanECMPRoutes: failed to list ecmp routes: %v", err) + return nil + } + + ovnRouteCache := make(map[string][]*ovnRoute) + for _, logicalRouterStaticRoute := range logicalRouterStaticRoutes { + p := func(item *nbdb.LogicalRouter) bool { + return util.SliceHasStringItem(item.StaticRoutes, logicalRouterStaticRoute.UUID) + } + logicalRouters, err := c.nbClient.findLogicalRoutersWithPredicate(p) + if err != nil { + klog.Errorf("CleanECMPRoutes: failed to find logical router for %s, err: %v", logicalRouterStaticRoute.UUID, err) + continue + } + + route := &ovnRoute{ + nextHop: logicalRouterStaticRoute.Nexthop, + uuid: logicalRouterStaticRoute.UUID, + router: logicalRouters[0].Name, + outport: *logicalRouterStaticRoute.OutputPort, + } + podIP, _, _ := net.ParseCIDR(logicalRouterStaticRoute.IPPrefix) + if _, ok := ovnRouteCache[podIP.String()]; !ok { + ovnRouteCache[podIP.String()] = []*ovnRoute{route} + } else { + ovnRouteCache[podIP.String()] = append(ovnRouteCache[podIP.String()], route) + } + } + return ovnRouteCache +} diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 6018f5c75f..af825651a1 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -17,6 +17,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + apbroutecontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" svccontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/services" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/unidling" @@ -51,8 +52,8 @@ type DefaultNetworkController struct { // cluster's east-west traffic. loadbalancerClusterCache map[kapi.Protocol]string - externalGWCache map[ktypes.NamespacedName]*externalRouteInfo - exGWCacheMutex sync.RWMutex + externalGWCache map[ktypes.NamespacedName]*apbroutecontroller.ExternalRouteInfo + exGWCacheMutex *sync.RWMutex // egressFirewalls is a map of namespaces and the egressFirewall attached to it egressFirewalls sync.Map @@ -93,6 +94,9 @@ type DefaultNetworkController struct { svcController *svccontroller.Controller // Controller used to handle egress services egressSvcController *egresssvc.Controller + + // Controller used to handle the admin policy based external route resources + apbExternalRouteController *apbroutecontroller.ExternalGatewayMasterController // svcFactory used to handle service related events svcFactory informers.SharedInformerFactory @@ -163,6 +167,20 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, zoneICHandler = zoneic.NewZoneInterconnectHandler(&util.DefaultNetInfo{}, cnci.nbClient, cnci.sbClient) zoneChassisHandler = zoneic.NewZoneChassisHandler(cnci.sbClient) } + apbExternalRouteController, err := apbroutecontroller.NewExternalMasterController( + DefaultNetworkControllerName, + cnci.client, + cnci.kube.APBRouteClient, + defaultStopChan, + cnci.watchFactory.PodCoreInformer(), + cnci.watchFactory.NamespaceInformer(), + cnci.watchFactory.NodeCoreInformer().Lister(), + cnci.nbClient, + addressSetFactory, + ) + if err != nil { + return nil, fmt.Errorf("unable to create new admin policy based external route controller while creating new default network controller :%w", err) + } oc := &DefaultNetworkController{ BaseNetworkController: BaseNetworkController{ @@ -181,8 +199,8 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, wg: defaultWg, localZoneNodes: &sync.Map{}, }, - externalGWCache: make(map[ktypes.NamespacedName]*externalRouteInfo), - exGWCacheMutex: sync.RWMutex{}, + externalGWCache: apbExternalRouteController.ExternalGWCache, + exGWCacheMutex: apbExternalRouteController.ExGWCacheMutex, eIPC: egressIPZoneController{ nodeIPUpdateMutex: &sync.Mutex{}, podAssignmentMutex: &sync.Mutex{}, @@ -199,6 +217,7 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, svcFactory: svcFactory, zoneICHandler: zoneICHandler, zoneChassisHandler: zoneChassisHandler, + apbExternalRouteController: apbExternalRouteController, } // Allocate IPs for logical router port "GwRouterToJoinSwitchPrefix + OVNClusterRouter". This should always @@ -383,10 +402,6 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { klog.Infof("Starting all the Watchers...") start := time.Now() - // Sync external gateway routes. External gateway may be set in namespaces - // or via pods. So execute an individual sync method at startup - WithSyncDurationMetricNoError("external gateway routes", oc.cleanExGwECMPRoutes) - // WatchNamespaces() should be started first because it has no other // dependencies, and WatchNodes() depends on it if err := WithSyncDurationMetric("namespace", oc.WatchNamespaces); err != nil { @@ -497,6 +512,12 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { }() } + oc.wg.Add(1) + go func() { + defer oc.wg.Done() + oc.apbExternalRouteController.Run(1) + }() + end := time.Since(start) klog.Infof("Completing all the Watchers took %v", end) metrics.MetricMasterSyncDuration.WithLabelValues("all watchers").Set(end.Seconds()) diff --git a/go-controller/pkg/ovn/egressgw.go b/go-controller/pkg/ovn/egressgw.go index c63735d56b..360143c71a 100644 --- a/go-controller/pkg/ovn/egressgw.go +++ b/go-controller/pkg/ovn/egressgw.go @@ -6,8 +6,6 @@ import ( "net" "regexp" "strings" - "sync" - "time" utilnet "k8s.io/utils/net" @@ -17,6 +15,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + apbroutecontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/pkg/errors" @@ -34,26 +33,6 @@ type gatewayInfo struct { bfdEnabled bool } -// Build cache of routes in OVN -// map[podIP][]ovnRoute -type ovnRoute struct { - nextHop string - uuid string - router string - outport string - shouldExist bool -} - -type externalRouteInfo struct { - sync.Mutex - deleted bool - podName ktypes.NamespacedName - // podExternalRoutes is a cache keeping the LR routes added to the GRs when - // external gateways are used. The first map key is the podIP (src-ip of the route), - // the second the GW IP (next hop), and the third the GR name - podExternalRoutes map[string]map[string]string -} - func getHybridRouteAddrSetDbIDs(nodeName, controller string) *libovsdbops.DbObjectIDs { return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetHybridNodeRoute, controller, map[libovsdbops.ExternalIDKey]string{ @@ -63,7 +42,7 @@ func getHybridRouteAddrSetDbIDs(nodeName, controller string) *libovsdbops.DbObje } // ensureRouteInfoLocked either gets the current routeInfo in the cache with a lock, or creates+locks a new one if missing -func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.NamespacedName) (*externalRouteInfo, error) { +func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.NamespacedName) (*apbroutecontroller.ExternalRouteInfo, error) { // We don't want to hold the cache lock while we try to lock the routeInfo (unless we are creating it, then we know // no one else is using it). This could lead to dead lock. Therefore the steps here are: // 1. Get the cache lock, try to find the routeInfo @@ -72,9 +51,9 @@ func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.Namespa oc.exGWCacheMutex.Lock() routeInfo, ok := oc.externalGWCache[podName] if !ok { - routeInfo = &externalRouteInfo{ - podExternalRoutes: make(map[string]map[string]string), - podName: podName, + routeInfo = &apbroutecontroller.ExternalRouteInfo{ + PodExternalRoutes: make(map[string]map[string]string), + PodName: podName, } // we are creating routeInfo and going to set it in podExternalRoutes map // so safe to hold the lock while we create and add it @@ -92,7 +71,7 @@ func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.Namespa // 5. If routeInfo was deleted between releasing the cache lock and grabbing // the routeInfo lock, return an error so the caller doesn't use it and // retries the operation later - if routeInfo.deleted { + if routeInfo.Deleted { routeInfo.Unlock() return nil, fmt.Errorf("routeInfo for pod %s, was altered during ensure route info", podName) } @@ -101,11 +80,11 @@ func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.Namespa } // getRouteInfosForNamespace returns all routeInfos for a specific namespace -func (oc *DefaultNetworkController) getRouteInfosForNamespace(namespace string) []*externalRouteInfo { +func (oc *DefaultNetworkController) getRouteInfosForNamespace(namespace string) []*apbroutecontroller.ExternalRouteInfo { oc.exGWCacheMutex.RLock() defer oc.exGWCacheMutex.RUnlock() - routes := make([]*externalRouteInfo, 0) + routes := make([]*apbroutecontroller.ExternalRouteInfo, 0) for namespacedName, routeInfo := range oc.externalGWCache { if namespacedName.Namespace == namespace { routes = append(routes, routeInfo) @@ -116,7 +95,7 @@ func (oc *DefaultNetworkController) getRouteInfosForNamespace(namespace string) } // deleteRouteInfoLocked removes a routeInfo from the cache, and returns it locked -func (oc *DefaultNetworkController) deleteRouteInfoLocked(name ktypes.NamespacedName) *externalRouteInfo { +func (oc *DefaultNetworkController) deleteRouteInfoLocked(name ktypes.NamespacedName) *apbroutecontroller.ExternalRouteInfo { // Attempt to find the routeInfo in the cache, release the cache lock while // we try to lock the routeInfo to avoid any deadlock oc.exGWCacheMutex.RLock() @@ -128,12 +107,12 @@ func (oc *DefaultNetworkController) deleteRouteInfoLocked(name ktypes.Namespaced } routeInfo.Lock() - if routeInfo.deleted { + if routeInfo.Deleted { routeInfo.Unlock() return nil } - routeInfo.deleted = true + routeInfo.Deleted = true go func() { oc.exGWCacheMutex.Lock() @@ -327,7 +306,7 @@ func (oc *DefaultNetworkController) deleteLogicalRouterStaticRoute(podIP, mask, // deletePodGWRoute deletes all associated gateway routing resources for one // pod gateway route -func (oc *DefaultNetworkController) deletePodGWRoute(routeInfo *externalRouteInfo, podIP, gw, gr string) error { +func (oc *DefaultNetworkController) deletePodGWRoute(routeInfo *apbroutecontroller.ExternalRouteInfo, podIP, gw, gr string) error { if utilnet.IsIPv6String(gw) != utilnet.IsIPv6String(podIP) { return nil } @@ -335,18 +314,18 @@ func (oc *DefaultNetworkController) deletePodGWRoute(routeInfo *externalRouteInf mask := util.GetIPFullMask(podIP) if err := oc.deleteLogicalRouterStaticRoute(podIP, mask, gw, gr); err != nil { return fmt.Errorf("unable to delete pod %s ECMP route to GR %s, GW: %s: %w", - routeInfo.podName, gr, gw, err) + routeInfo.PodName, gr, gw, err) } klog.V(5).Infof("ECMP route deleted for pod: %s, on gr: %s, to gw: %s", - routeInfo.podName, gr, gw) + routeInfo.PodName, gr, gw) node := util.GetWorkerFromGatewayRouter(gr) // The gw is deleted from the routes cache after this func is called, length 1 // means it is the last gw for the pod and the hybrid route policy should be deleted. - if entry := routeInfo.podExternalRoutes[podIP]; len(entry) == 1 { + if entry := routeInfo.PodExternalRoutes[podIP]; len(entry) == 1 { if err := oc.delHybridRoutePolicyForPod(net.ParseIP(podIP), node); err != nil { - return fmt.Errorf("unable to delete hybrid route policy for pod %s: err: %v", routeInfo.podName, err) + return fmt.Errorf("unable to delete hybrid route policy for pod %s: err: %v", routeInfo.PodName, err) } } @@ -420,15 +399,25 @@ func (oc *DefaultNetworkController) deletePodGWRoutesForNamespace(pod *kapi.Pod, // are given, all routes for the namespace are deleted. func (oc *DefaultNetworkController) deleteGWRoutesForNamespace(namespace string, matchGWs sets.Set[string]) error { deleteAll := (matchGWs == nil || matchGWs.Len() == 0) + + policyGWIPs, err := oc.apbExternalRouteController.GetDynamicGatewayIPsForTargetNamespace(namespace) + if err != nil { + return err + } + policyStaticGWIPs, err := oc.apbExternalRouteController.GetStaticGatewayIPsForTargetNamespace(namespace) + if err != nil { + return err + } + policyGWIPs.Union(policyStaticGWIPs) for _, routeInfo := range oc.getRouteInfosForNamespace(namespace) { routeInfo.Lock() - if routeInfo.deleted { + if routeInfo.Deleted { routeInfo.Unlock() continue } - for podIP, routes := range routeInfo.podExternalRoutes { + for podIP, routes := range routeInfo.PodExternalRoutes { for gw, gr := range routes { - if deleteAll || matchGWs.Has(gw) { + if (deleteAll || matchGWs.Has(gw)) && !policyGWIPs.Has(gw) { if err := oc.deletePodGWRoute(routeInfo, podIP, gw, gr); err != nil { // if we encounter error while deleting routes for one pod; we return and don't try subsequent pods routeInfo.Unlock() @@ -453,12 +442,12 @@ func (oc *DefaultNetworkController) deleteGWRoutesForPod(name ktypes.NamespacedN for _, podIPNet := range podIPNets { podIP := podIPNet.IP.String() - routes, ok := routeInfo.podExternalRoutes[podIP] + routes, ok := routeInfo.PodExternalRoutes[podIP] if !ok { continue } if len(routes) == 0 { - delete(routeInfo.podExternalRoutes, podIP) + delete(routeInfo.PodExternalRoutes, podIP) continue } for gw, gr := range routes { @@ -498,7 +487,7 @@ func (oc *DefaultNetworkController) addGWRoutesForPod(gateways []*gatewayInfo, p podIP := podIPNet.IP.String() for _, gw := range gws { // if route was already programmed, skip it - if foundGR, ok := routeInfo.podExternalRoutes[podIP][gw]; ok && foundGR == gr { + if foundGR, ok := routeInfo.PodExternalRoutes[podIP][gw]; ok && foundGR == gr { routesAdded++ continue } @@ -507,12 +496,12 @@ func (oc *DefaultNetworkController) addGWRoutesForPod(gateways []*gatewayInfo, p if err := oc.createBFDStaticRoute(gateway.bfdEnabled, gw, podIP, gr, port, mask); err != nil { return err } - if routeInfo.podExternalRoutes[podIP] == nil { - routeInfo.podExternalRoutes[podIP] = make(map[string]string) + if routeInfo.PodExternalRoutes[podIP] == nil { + routeInfo.PodExternalRoutes[podIP] = make(map[string]string) } - routeInfo.podExternalRoutes[podIP][gw] = gr + routeInfo.PodExternalRoutes[podIP][gw] = gr routesAdded++ - if len(routeInfo.podExternalRoutes[podIP]) == 1 { + if len(routeInfo.PodExternalRoutes[podIP]) == 1 { if err := oc.addHybridRoutePolicyForPod(podIPNet.IP, node); err != nil { return err } @@ -878,135 +867,6 @@ func (oc *DefaultNetworkController) extSwitchPrefix(nodeName string) (string, er return "", nil } -func (oc *DefaultNetworkController) cleanExGwECMPRoutes() { - start := time.Now() - defer func() { - klog.Infof("Syncing exgw routes took %v", time.Since(start)) - }() - - // migration from LGW to SGW mode - // for shared gateway mode, these LRPs shouldn't exist, so delete them all - if config.Gateway.Mode == config.GatewayModeShared { - if err := oc.delAllHybridRoutePolicies(); err != nil { - klog.Errorf("Error while removing hybrid policies on moving to SGW mode, error: %v", err) - } - } else if config.Gateway.Mode == config.GatewayModeLocal { - // remove all legacy hybrid route policies - if err := oc.delAllLegacyHybridRoutePolicies(); err != nil { - klog.Errorf("Error while removing legacy hybrid policies, error: %v", err) - } - } - - // Get all ECMP routes in OVN and build cache - ovnRouteCache := oc.buildOVNECMPCache() - - if len(ovnRouteCache) == 0 { - // Even if no ECMP routes exist, we should ensure no 501 LRPs exist either - if err := oc.delAllHybridRoutePolicies(); err != nil { - klog.Errorf("Error while removing hybrid policies, error: %v", err) - } - // nothing in OVN, so no reason to search for stale routes - return - } - - // Build cache of expected routes in the cluster - // map[podIP][]nextHops - clusterRouteCache := make(map[string][]string) - - // Find all pods serving as exgw - oc.buildClusterECMPCacheFromPods(clusterRouteCache) - - // Get all namespaces with exgw routes specified - oc.buildClusterECMPCacheFromNamespaces(clusterRouteCache) - - // compare caches and see if OVN routes are stale - for podIP, ovnRoutes := range ovnRouteCache { - // pod IP does not exist in the cluster - // remove route and any hybrid policy - if _, ok := clusterRouteCache[podIP]; !ok { - continue - } - - // podIP exists, check if route matches - expectedNexthops := clusterRouteCache[podIP] - for _, ovnRoute := range ovnRoutes { - // if length of the output port is 0, this is a legacy route (we now always specify output interface) - if len(ovnRoute.outport) == 0 { - continue - } - - node := util.GetWorkerFromGatewayRouter(ovnRoute.router) - // prefix will signify secondary exgw bridge, or empty if normal setup - // have to determine if a node changed while master was down and if the route swapped from - // the default bridge to a new secondary bridge (or vice versa) - prefix, err := oc.extSwitchPrefix(node) - if err != nil { - // we shouldn't continue in this case, because we cant be sure this is a route we want to remove - klog.Errorf("Cannot sync exgw route: %+v, unable to determine exgw switch prefix: %v", - ovnRoute, err) - } else if (prefix != "" && !strings.Contains(ovnRoute.outport, prefix)) || - (prefix == "" && strings.Contains(ovnRoute.outport, types.EgressGWSwitchPrefix)) { - continue - } - - for _, clusterNexthop := range expectedNexthops { - if ovnRoute.nextHop == clusterNexthop { - ovnRoute.shouldExist = true - } - } - } - } - - klog.Infof("OVN ECMP route cache is: %+v", ovnRouteCache) - klog.Infof("Cluster ECMP route cache is: %+v", clusterRouteCache) - - // iterate through ovn routes and remove any stale entries - for podIP, ovnRoutes := range ovnRouteCache { - podHasAnyECMPRoutes := false - for _, ovnRoute := range ovnRoutes { - if !ovnRoute.shouldExist { - klog.Infof("Found stale exgw ecmp route, podIP: %s, nexthop: %s, router: %s", - podIP, ovnRoute.nextHop, ovnRoute.router) - lrsr := nbdb.LogicalRouterStaticRoute{UUID: ovnRoute.uuid} - err := libovsdbops.DeleteLogicalRouterStaticRoutes(oc.nbClient, ovnRoute.router, &lrsr) - if err != nil { - klog.Errorf("Error deleting static route %s from router %s: %v", ovnRoute.uuid, ovnRoute.router, err) - } - - // check to see if we should also clean up bfd - node := util.GetWorkerFromGatewayRouter(ovnRoute.router) - // prefix will signify secondary exgw bridge, or empty if normal setup - // have to determine if a node changed while master was down and if the route swapped from - // the default bridge to a new secondary bridge (or vice versa) - prefix, err := oc.extSwitchPrefix(node) - if err != nil { - // we shouldn't continue in this case, because we cant be sure this is a route we want to remove - klog.Errorf("Cannot sync exgw bfd: %+v, unable to determine exgw switch prefix: %v", - ovnRoute, err) - } else { - if err := oc.cleanUpBFDEntry(ovnRoute.nextHop, ovnRoute.router, prefix); err != nil { - klog.Errorf("Cannot clean up BFD entry: %w", err) - } - } - - } else { - podHasAnyECMPRoutes = true - } - } - - // if pod had no ECMP routes we need to make sure we remove logical route policy for local gw mode - if !podHasAnyECMPRoutes { - for _, ovnRoute := range ovnRoutes { - gr := strings.TrimPrefix(ovnRoute.router, types.GWRouterPrefix) - if err := oc.delHybridRoutePolicyForPod(net.ParseIP(podIP), gr); err != nil { - klog.Errorf("Error while removing hybrid policy for pod IP: %s, on node: %s, error: %v", - podIP, gr, err) - } - } - } - } -} - func getExGwPodIPs(gatewayPod *kapi.Pod) (sets.Set[string], error) { foundGws := sets.New[string]() if gatewayPod.Annotations[util.RoutingNetworkAnnotation] != "" { @@ -1041,142 +901,6 @@ func getExGwPodIPs(gatewayPod *kapi.Pod) (sets.Set[string], error) { return foundGws, nil } -func (oc *DefaultNetworkController) buildClusterECMPCacheFromNamespaces(clusterRouteCache map[string][]string) { - namespaces, err := oc.watchFactory.GetNamespaces() - if err != nil { - klog.Errorf("Error getting all namespaces for exgw ecmp route sync: %v", err) - return - } - for _, namespace := range namespaces { - if _, ok := namespace.Annotations[util.RoutingExternalGWsAnnotation]; !ok { - continue - } - // namespace has exgw routes, build cache - gwIPs, err := util.ParseRoutingExternalGWAnnotation(namespace.Annotations[util.RoutingExternalGWsAnnotation]) - if err != nil { - klog.Errorf("Unable to clean ExGw ECMP routes for namespace: %s, %v", namespace.Name, err) - continue - } - // get all pods in the namespace - nsPods, err := oc.watchFactory.GetPods(namespace.Name) - if err != nil { - klog.Errorf("Unable to clean ExGw ECMP routes for namespace: %s, %v", - namespace, err) - continue - } - for _, gwIP := range gwIPs.UnsortedList() { - for _, nsPod := range nsPods { - // ignore completed pods, host networked pods, pods not scheduled - if util.PodWantsHostNetwork(nsPod) || util.PodCompleted(nsPod) || !util.PodScheduled(nsPod) { - continue - } - for _, podIP := range nsPod.Status.PodIPs { - podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() - if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIPStr) { - continue - } - if val, ok := clusterRouteCache[podIPStr]; ok { - // add gwIP to cache only if buildClusterECMPCacheFromPods hasn't already added it - gwIPexists := false - for _, existingGwIP := range val { - if existingGwIP == gwIP { - gwIPexists = true - break - } - } - if !gwIPexists { - clusterRouteCache[podIPStr] = append(clusterRouteCache[podIPStr], gwIP) - } - } else { - clusterRouteCache[podIPStr] = []string{gwIP} - } - } - } - } - } -} - -func (oc *DefaultNetworkController) buildClusterECMPCacheFromPods(clusterRouteCache map[string][]string) { - // Get all Pods serving as exgws - pods, err := oc.watchFactory.GetAllPods() - if err != nil { - klog.Error("Error getting all pods for exgw ecmp route sync: %v", err) - return - } - for _, pod := range pods { - podRoutingNamespaceAnno := pod.Annotations[util.RoutingNamespaceAnnotation] - if podRoutingNamespaceAnno == "" { - continue - } - // get all pods in the namespace - nsPods, err := oc.watchFactory.GetPods(podRoutingNamespaceAnno) - if err != nil { - klog.Errorf("Unable to clean ExGw ECMP routes for exgw: %s, serving namespace: %s, %v", - pod.Name, podRoutingNamespaceAnno, err) - continue - } - - // pod is serving as exgw, build cache - gwIPs, err := getExGwPodIPs(pod) - if err != nil { - klog.Errorf("Error getting exgw IPs for pod: %s, error: %v", pod.Name, err) - continue - } - for _, gwIP := range gwIPs.UnsortedList() { - for _, nsPod := range nsPods { - // ignore completed pods, host networked pods, pods not scheduled - if util.PodWantsHostNetwork(nsPod) || util.PodCompleted(nsPod) || !util.PodScheduled(nsPod) { - continue - } - for _, podIP := range nsPod.Status.PodIPs { - podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() - if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIPStr) { - continue - } - clusterRouteCache[podIPStr] = append(clusterRouteCache[podIPStr], gwIP) - } - } - } - } -} - -func (oc *DefaultNetworkController) buildOVNECMPCache() map[string][]*ovnRoute { - p := func(item *nbdb.LogicalRouterStaticRoute) bool { - return item.Options["ecmp_symmetric_reply"] == "true" - } - logicalRouterStaticRoutes, err := libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(oc.nbClient, p) - if err != nil { - klog.Errorf("CleanECMPRoutes: failed to list ecmp routes: %v", err) - return nil - } - - ovnRouteCache := make(map[string][]*ovnRoute) - for _, logicalRouterStaticRoute := range logicalRouterStaticRoutes { - p := func(item *nbdb.LogicalRouter) bool { - return util.SliceHasStringItem(item.StaticRoutes, logicalRouterStaticRoute.UUID) - } - logicalRouters, err := libovsdbops.FindLogicalRoutersWithPredicate(oc.nbClient, p) - if err != nil { - klog.Errorf("CleanECMPRoutes: failed to find logical router for %s, err: %v", logicalRouterStaticRoute.UUID, err) - continue - } - - route := &ovnRoute{ - nextHop: logicalRouterStaticRoute.Nexthop, - uuid: logicalRouterStaticRoute.UUID, - router: logicalRouters[0].Name, - outport: *logicalRouterStaticRoute.OutputPort, - } - podIP, _, _ := net.ParseCIDR(logicalRouterStaticRoute.IPPrefix) - if _, ok := ovnRouteCache[podIP.String()]; !ok { - ovnRouteCache[podIP.String()] = []*ovnRoute{route} - } else { - ovnRouteCache[podIP.String()] = append(ovnRouteCache[podIP.String()], route) - } - } - return ovnRouteCache -} - func makePodGWKey(pod *kapi.Pod) string { return fmt.Sprintf("%s_%s", pod.Namespace, pod.Name) } diff --git a/go-controller/pkg/ovn/external_gateway_test.go b/go-controller/pkg/ovn/external_gateway_test.go new file mode 100644 index 0000000000..fd938c27f5 --- /dev/null +++ b/go-controller/pkg/ovn/external_gateway_test.go @@ -0,0 +1,2891 @@ +package ovn + +import ( + "context" + "encoding/json" + "fmt" + "net" + "sync" + "time" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + + ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/onsi/ginkgo" + "github.com/onsi/ginkgo/extensions/table" + "github.com/onsi/gomega" + "github.com/urfave/cli/v2" +) + +var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { + const ( + namespaceName = "namespace1" + ) + var ( + app *cli.App + fakeOvn *FakeOVN + + bfd1NamedUUID = "bfd-1-UUID" + bfd2NamedUUID = "bfd-2-UUID" + logicalRouterPort = "rtoe-GR_node1" + ) + + ginkgo.BeforeEach(func() { + // Restore global default values before each testcase + gomega.Expect(config.PrepareTestConfig()).To(gomega.Succeed()) + + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + + fakeOvn = NewFakeOVN(true) + }) + + ginkgo.AfterEach(func() { + fakeOvn.shutdown() + }) + + ginkgo.Context("on setting namespace gateway static hop", func() { + + table.DescribeTable("reconciles an new pod with namespace single exgw static GW already set", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1"), bfd, nil, nil, bfd, ""), + }, + }, + ) + + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles an new pod with namespace single exgw static gateway already set with pod event first", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1"), bfd, nil, nil, bfd, ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Create(context.TODO(), &namespaceT, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles an new pod with namespace double exgw static gateways already set", func(bfd bool, finalNB []libovsdbtest.TestData) { + + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1", "9.0.0.2"), bfd, nil, nil, bfd, ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.BFD{ + UUID: bfd2NamedUUID, + DstIP: "9.0.0.2", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + BFD: &bfd2NamedUUID, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }), + ) + + table.DescribeTable("reconciles deleting a pod with namespace double exgw static gateway already set", + func(bfd bool, + initNB []libovsdbtest.TestData, + syncNB []libovsdbtest.TestData, + finalNB []libovsdbtest.TestData, + ) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: initNB, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(syncNB)) + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + p := newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1", "9.0.0.2"), bfd, nil, nil, bfd, "") + _, err = fakeOvn.fakeClient.AdminPolicyRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Create(context.Background(), &p, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + deletePod(t.namespace, t.podName, fakeOvn.fakeClient.KubeClient) + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("No BFD", false, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + table.Entry("BFD", true, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.BFD{ + UUID: bfd2NamedUUID, + DstIP: "9.0.0.2", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + BFD: &bfd1NamedUUID, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + BFD: &bfd2NamedUUID, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + ) + + table.DescribeTable("reconciles deleting a pod with namespace double exgw static gateway already set IPV6", + func(bfd bool, + initNB, syncNB, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "fd00:10:244:2::0/64", + "fd00:10:244:2::2", + "fd00:10:244:2::1", + "myPod", + "fd00:10:244:2::3", + "0a:58:49:a1:93:cb", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: initNB, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + ) + config.IPv6Mode = true + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(syncNB)) + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/64"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/64", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + p := newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("fd2e:6f44:5dd8::89", "fd2e:6f44:5dd8::76"), bfd, nil, nil, bfd, "") + _, err = fakeOvn.fakeClient.AdminPolicyRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Create(context.Background(), &p, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + deletePod(t.namespace, t.podName, fakeOvn.fakeClient.KubeClient) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("BFD IPV6", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "fd00:10:244:2::3/128", + BFD: &bfd1NamedUUID, + OutputPort: &logicalRouterPort, + Nexthop: "fd2e:6f44:5dd8::89", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "fd00:10:244:2::3/128", + BFD: &bfd1NamedUUID, + OutputPort: &logicalRouterPort, + Nexthop: "fd2e:6f44:5dd8::76", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.BFD{ + UUID: bfd2NamedUUID, + DstIP: "fd2e:6f44:5dd8::76", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "fd2e:6f44:5dd8::89", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:49:a1:93:cb fd00:10:244:2::3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:49:a1:93:cb fd00:10:244:2::3"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + ), + ) + + table.DescribeTable("reconciles deleting a exgw namespace with active pod", + func(bfd bool, + initNB []libovsdbtest.TestData, + finalNB []libovsdbtest.TestData, + ) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: initNB, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1", "9.0.0.2"), bfd, nil, nil, bfd, ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + + deleteNamespace(t.namespace, fakeOvn.fakeClient.KubeClient) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("No BFD", false, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + table.Entry("BFD", true, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.BFD{ + UUID: "bfd1-UUID", + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.BFD{ + UUID: "bfd2-UUID", + DstIP: "9.0.0.2", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + BFD: &bfd2NamedUUID, + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + )) + }) + + ginkgo.Context("on setting pod dynamic gateways", func() { + table.DescribeTable("reconciles a host networked pod acting as a exgw for another namespace for new pod", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "9.0.0.1") + gwPod.Spec.HostNetwork = true + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + gwPod, + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, + nil, + bfd, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + bfd, + ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Create(context.TODO(), newPod(t.namespace, t.podName, t.nodeName, t.podIP), metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles a host networked pod acting as a exgw for another namespace for existing pod", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "9.0.0.1") + gwPod.Spec.HostNetwork = true + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, + nil, + bfd, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + bfd, + ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespaceX.Name).Create(context.TODO(), &gwPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles a multus networked pod acting as a exgw for another namespace for new pod", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + ns := nettypes.NetworkStatus{Name: "dummy", IPs: []string{"11.0.0.1"}} + networkStatuses := []nettypes.NetworkStatus{ns} + nsEncoded, err := json.Marshal(networkStatuses) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "9.0.0.1") + gwPod.Annotations = map[string]string{ + "k8s.v1.cni.cncf.io/network-status": string(nsEncoded), + } + gwPod.Spec.HostNetwork = true + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + gwPod, + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, + nil, + bfd, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + bfd, + "dummy"), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err = fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Create(context.TODO(), newPod(t.namespace, t.podName, t.nodeName, t.podIP), metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "11.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "11.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "11.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles deleting a host networked pod acting as a exgw for another namespace for existing pod", + func(bfd bool, + beforeDeleteNB []libovsdbtest.TestData, + afterDeleteNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "9.0.0.1") + gwPod.Spec.HostNetwork = true + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, + nil, + bfd, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + bfd, + "", + ), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespaceX.Name).Create(context.TODO(), &gwPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(beforeDeleteNB)) + + deletePod(gwPod.Namespace, gwPod.Name, fakeOvn.fakeClient.KubeClient) + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(afterDeleteNB)) + gomega.Eventually(func() string { + return getNamespaceAnnotations(fakeOvn.fakeClient.KubeClient, namespaceT.Name)[util.ExternalGatewayPodIPsAnnotation] + }, 5).Should(gomega.Equal("")) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("No BFD", false, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + ) + }) + ginkgo.Context("on using bfd", func() { + ginkgo.It("should enable bfd only on the namespace gw when set", func() { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "10.0.0.1") + gwPod.Spec.HostNetwork = true + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + true, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + false, + "", + ), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespaceX.Name).Create(context.TODO(), &gwPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "10.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + } + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should enable bfd only on the gw pod when set", func() { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "10.0.0.1") + gwPod.Spec.HostNetwork = true + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + false, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + true, + "", + ), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespaceX.Name).Create(context.TODO(), &gwPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "10.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "10.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + BFD: &bfd1NamedUUID, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + } + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should disable bfd when removing the static hop from the namespace", func() { + app.Action = func(ctx *cli.Context) error { + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + initNB := libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + BFD: &bfd1NamedUUID, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }, + } + fakeOvn.startWithDBSetup( + initNB, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData([]libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + })) + + p := newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + true, + nil, + nil, + false, + "") + _, err = fakeOvn.fakeClient.AdminPolicyRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Create(context.Background(), &p, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + tempNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + BFD: &bfd1NamedUUID, + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(tempNB)) + + updatePolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + false, + nil, + nil, + false, + "", + fakeOvn.fakeClient.AdminPolicyRouteClient, + ) + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + } + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + ginkgo.Context("hybrid route policy operations in lgw mode", func() { + ginkgo.It("add hybrid route policy for pods", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + }, + }, + ) + + fakeOvn.RunAPBExternalPolicyController() + + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "2a7a61cb-fb13-4266-a3f0-9ac5c4471123 [u2596996164]", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"2a7a61cb-fb13-4266-a3f0-9ac5c4471123 [u2596996164]"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + err := fakeOvn.controller.addHybridRoutePolicyForPod(net.ParseIP("10.128.1.3"), "node1") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + // check if the address-set was created with the podIP + dbIDs := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + fakeOvn.asf.ExpectAddressSetWithIPs(dbIDs, []string{"10.128.1.3"}) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should reconcile a pod and create/delete the hybridRoutePolicy accordingly", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + + namespaceT := *newNamespace("namespace1") + namespaceT.Annotations = map[string]string{"k8s.ovn.org/routing-external-gws": "9.0.0.1"} + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + true, + nil, + nil, + false, + "", + ), + }, + }, + ) + + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + nbWithLRP := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "lrp1", + Action: "reroute", + Match: "inport == \"rtos-node1\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + Nexthops: []string{"100.64.0.4"}, + Priority: ovntypes.HybridOverlayReroutePriority, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + OutputPort: &logicalRouterPort, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + }, + &nbdb.LogicalSwitch{ + UUID: "493c61b4-2f97-446d-a1f0-1f713b510bbf", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": "namespace1", + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "requested-chassis": "node1", + "iface-id-ver": "myPod", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouter{ + UUID: "e496b76e-18a1-461e-a919-6dcf0b3c35db", + Name: "ovn_cluster_router", + Policies: []string{"lrp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "8945d2c1-bf8a-43ab-aa9f-6130eb525682", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + } + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(nbWithLRP)) + + deletePod(t.namespace, t.podName, fakeOvn.fakeClient.KubeClient) + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalSwitch{ + UUID: "493c61b4-2f97-446d-a1f0-1f713b510bbf", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "e496b76e-18a1-461e-a919-6dcf0b3c35db", + Name: "ovn_cluster_router", + }, + &nbdb.LogicalRouter{ + UUID: "8945d2c1-bf8a-43ab-aa9f-6130eb525682", + Name: "GR_node1", + }, + } + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should create a single policy for concurrent addHybridRoutePolicy for the same node", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + }, + }, + ) + fakeOvn.RunAPBExternalPolicyController() + + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "lrp1", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"lrp1"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + wg := &sync.WaitGroup{} + c := make(chan int) + for i := 1; i <= 5; i++ { + podIndex := i + wg.Add(1) + go func() { + defer wg.Done() + <-c + fakeOvn.controller.addHybridRoutePolicyForPod(net.ParseIP(fmt.Sprintf("10.128.1.%d", podIndex)), "node1") + }() + } + close(c) + wg.Wait() + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + + err := fakeOvn.controller.addHybridRoutePolicyForPod(net.ParseIP(fmt.Sprintf("10.128.1.%d", 6)), "node1") + // adding another pod after the initial burst should not trigger an error or change db + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("delete hybrid route policy for pods", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "2a7a61cb-fb13-4266-a3f0-9ac5c4471123 [u2596996164]", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"2a7a61cb-fb13-4266-a3f0-9ac5c4471123 [u2596996164]"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + }, + }, + ) + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + injectNode(fakeOvn) + fakeOvn.RunAPBExternalPolicyController() + err := fakeOvn.controller.delHybridRoutePolicyForPod(net.ParseIP("10.128.1.3"), "node1") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + dbIDs := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + fakeOvn.asf.EventuallyExpectNoAddressSet(dbIDs) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("delete hybrid route policy for pods with force", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeShared + asIndex1 := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + as1v4, _ := addressset.GetHashNamesForAS(asIndex1) + asIndex2 := getHybridRouteAddrSetDbIDs("node2", DefaultNetworkControllerName) + as2v4, _ := addressset.GetHashNamesForAS(asIndex2) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "501-1st-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == $" + as1v4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouterPolicy{ + UUID: "501-2nd-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.1.4"}, + Match: "inport == \"rtos-node2\" && ip4.src == $" + as2v4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"501-1st-UUID", "501-2nd-UUID"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + }, + }, + ) + + fakeOvn.RunAPBExternalPolicyController() + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + err := fakeOvn.controller.delAllHybridRoutePolicies() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + dbIDs := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + fakeOvn.asf.EventuallyExpectNoAddressSet(dbIDs) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("delete legacy hybrid route policies", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "501-1st-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == 1.3.3.7 && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouterPolicy{ + UUID: "501-2nd-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.1.4"}, + Match: "inport == \"rtos-node2\" && ip4.src == 1.3.3.8 && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouterPolicy{ + UUID: "501-new-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.1.4"}, + Match: "inport == \"rtos-node2\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"501-1st-UUID", "501-2nd-UUID", "501-new-UUID"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + }, + }, + ) + + fakeOvn.RunAPBExternalPolicyController() + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "501-new-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.1.4"}, + Match: "inport == \"rtos-node2\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"501-new-UUID"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + err := fakeOvn.controller.delAllLegacyHybridRoutePolicies() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + ginkgo.Context("SNAT on gateway router operations", func() { + ginkgo.It("add/delete SNAT per pod on gateway router", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeShared + config.Gateway.DisableSNATMultipleGWs = true + + nodeName := "node1" + namespaceT := *newNamespace(namespaceName) + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + pod := []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + } + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName, + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + nodeName, + UUID: types.GWRouterPrefix + nodeName + "-UUID", + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: pod, + }, + ) + + finalNB := []libovsdbtest.TestData{ + &nbdb.NAT{ + UUID: "nat-UUID", + ExternalIP: "169.254.33.2", + LogicalIP: "10.128.1.3", + Options: map[string]string{"stateless": "false"}, + Type: nbdb.NATTypeSNAT, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + nodeName, + UUID: types.GWRouterPrefix + nodeName + "-UUID", + Nat: []string{"nat-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName, + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + } + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.RunAPBExternalPolicyController() + + extIPs, err := getExternalIPsGR(fakeOvn.controller.watchFactory, pod[0].Spec.NodeName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, fullMaskPodNet, _ := net.ParseCIDR("10.128.1.3/32") + gomega.Expect( + addOrUpdatePodSNAT(fakeOvn.controller.nbClient, pod[0].Spec.NodeName, extIPs, []*net.IPNet{fullMaskPodNet}), + ).To(gomega.Succeed()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + finalNB = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + nodeName, + UUID: types.GWRouterPrefix + nodeName + "-UUID", + Nat: []string{}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName, + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + } + err = deletePodSNAT(fakeOvn.controller.nbClient, nodeName, extIPs, []*net.IPNet{fullMaskPodNet}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) +}) + +func newPolicy(policyName string, fromNSSelector *metav1.LabelSelector, staticHopsGWIPs sets.String, bfdStatic bool, dynamicHopsNSSelector *metav1.LabelSelector, dynamicHopsPodSelector *metav1.LabelSelector, bfdDynamic bool, networkAttachementName string) adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute { + p := adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{ + From: adminpolicybasedrouteapi.ExternalNetworkSource{ + NamespaceSelector: *fromNSSelector, + }, + NextHops: adminpolicybasedrouteapi.ExternalNextHops{}, + }, + } + + if staticHopsGWIPs.Len() > 0 { + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{} + for ip := range staticHopsGWIPs { + p.Spec.NextHops.StaticHops = append(p.Spec.NextHops.StaticHops, &adminpolicybasedrouteapi.StaticHop{IP: ip, BFDEnabled: bfdStatic}) + } + } + if dynamicHopsNSSelector != nil && dynamicHopsPodSelector != nil { + p.Spec.NextHops.DynamicHops = []*adminpolicybasedrouteapi.DynamicHop{ + {NamespaceSelector: dynamicHopsNSSelector, + PodSelector: *dynamicHopsPodSelector, + NetworkAttachmentName: networkAttachementName, + BFDEnabled: bfdDynamic}, + } + } + return p +} + +func updatePolicy(policyName string, fromNSSelector *metav1.LabelSelector, staticHopsGWIPs sets.String, bfdStatic bool, dynamicHopsNSSelector *metav1.LabelSelector, dynamicHopsPodSelector *metav1.LabelSelector, bfdDynamic bool, networkAttachementName string, fakeRouteClient adminpolicybasedrouteclientset.Interface) { + + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), policyName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + p.Generation++ + p.Spec.From.NamespaceSelector = *fromNSSelector + + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{} + if staticHopsGWIPs.Len() > 0 { + for ip := range staticHopsGWIPs { + p.Spec.NextHops.StaticHops = append(p.Spec.NextHops.StaticHops, &adminpolicybasedrouteapi.StaticHop{IP: ip, BFDEnabled: bfdStatic}) + } + } + p.Spec.NextHops.DynamicHops = []*adminpolicybasedrouteapi.DynamicHop{} + if dynamicHopsNSSelector != nil && dynamicHopsPodSelector != nil { + p.Spec.NextHops.DynamicHops = append(p.Spec.NextHops.DynamicHops, + &adminpolicybasedrouteapi.DynamicHop{ + NamespaceSelector: dynamicHopsNSSelector, + PodSelector: *dynamicHopsPodSelector, + NetworkAttachmentName: networkAttachementName, + BFDEnabled: bfdDynamic}, + ) + } + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} + +func deletePod(namespace, name string, fakeClient kubernetes.Interface) { + + p, err := fakeClient.CoreV1().Pods(namespace).Get(context.TODO(), name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + p.Generation++ + p.DeletionTimestamp = &metav1.Time{Time: time.Now()} + _, err = fakeClient.CoreV1().Pods(namespace).Update(context.Background(), p, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeClient.CoreV1().Pods(namespace).Delete(context.Background(), p.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} + +func deleteNamespace(namespaceName string, fakeClient kubernetes.Interface) { + + ns, err := fakeClient.CoreV1().Namespaces().Get(context.TODO(), namespaceName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ns.Generation++ + ns.DeletionTimestamp = &metav1.Time{Time: time.Now()} + _, err = fakeClient.CoreV1().Namespaces().Update(context.Background(), ns, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeClient.CoreV1().Namespaces().Delete(context.Background(), namespaceName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} + +func (o *FakeOVN) RunAPBExternalPolicyController() { + klog.Warningf("#### [%p] INIT Admin Policy Based External Controller", o) + o.controller.wg.Add(1) + go func() { + defer o.controller.wg.Done() + o.controller.apbExternalRouteController.Run(5) + }() +} diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index d6089023a3..3fbcd470e2 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -15,6 +15,7 @@ import ( libovsdbclient "github.com/ovn-org/libovsdb/client" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedroutefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" @@ -24,6 +25,8 @@ import ( egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" @@ -101,24 +104,26 @@ func (o *FakeOVN) start(objects ...runtime.Object) { egressQoSObjects := []runtime.Object{} multiNetworkPolicyObjects := []runtime.Object{} egressServiceObjects := []runtime.Object{} + apbExternalRouteObjects := []runtime.Object{} v1Objects := []runtime.Object{} - nads := []*nettypes.NetworkAttachmentDefinition{} + nads := []nettypes.NetworkAttachmentDefinition{} for _, object := range objects { - if _, isEgressIPObject := object.(*egressip.EgressIPList); isEgressIPObject { + switch o := object.(type) { + case *egressip.EgressIPList: egressIPObjects = append(egressIPObjects, object) - } else if _, isEgressFirewallObject := object.(*egressfirewall.EgressFirewallList); isEgressFirewallObject { + case *egressfirewall.EgressFirewallList: egressFirewallObjects = append(egressFirewallObjects, object) - } else if _, isEgressQoSObject := object.(*egressqos.EgressQoSList); isEgressQoSObject { + case *egressqos.EgressQoSList: egressQoSObjects = append(egressQoSObjects, object) - } else if _, isMultiNetworkPolicyObject := object.(*mnpapi.MultiNetworkPolicyList); isMultiNetworkPolicyObject { + case *mnpapi.MultiNetworkPolicyList: multiNetworkPolicyObjects = append(multiNetworkPolicyObjects, object) - } else if nadList, isNADObject := object.(*nettypes.NetworkAttachmentDefinitionList); isNADObject { - for i := range nadList.Items { - nads = append(nads, &nadList.Items[i]) - } - } else if _, isEgressServiceObject := object.(*egressservice.EgressServiceList); isEgressServiceObject { + case *egressservice.EgressServiceList: egressServiceObjects = append(egressServiceObjects, object) - } else { + case *nettypes.NetworkAttachmentDefinitionList: + nads = append(nads, o.Items...) + case *adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList: + apbExternalRouteObjects = append(apbExternalRouteObjects, object) + default: v1Objects = append(v1Objects, object) } } @@ -129,6 +134,7 @@ func (o *FakeOVN) start(objects ...runtime.Object) { EgressQoSClient: egressqosfake.NewSimpleClientset(egressQoSObjects...), MultiNetworkPolicyClient: mnpfake.NewSimpleClientset(multiNetworkPolicyObjects...), EgressServiceClient: egressservicefake.NewSimpleClientset(egressServiceObjects...), + AdminPolicyRouteClient: adminpolicybasedroutefake.NewSimpleClientset(apbExternalRouteObjects...), } o.init(nads) } @@ -147,7 +153,7 @@ func (o *FakeOVN) shutdown() { o.nbsbCleanup.Cleanup() } -func (o *FakeOVN) init(nadList []*nettypes.NetworkAttachmentDefinition) { +func (o *FakeOVN) init(nadList []nettypes.NetworkAttachmentDefinition) { var err error o.watcher, err = factory.NewMasterWatchFactory(o.fakeClient) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -169,7 +175,7 @@ func (o *FakeOVN) init(nadList []*nettypes.NetworkAttachmentDefinition) { o.controller.routerLoadBalancerGroupUUID = types.ClusterRouterLBGroupName + "-UUID" for _, nad := range nadList { - err := o.NewSecondaryNetworkController(nad) + err := o.NewSecondaryNetworkController(&nad) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } @@ -227,6 +233,7 @@ func NewOvnController(ovnClient *util.OVNMasterClientset, wf *factory.WatchFacto EIPClient: ovnClient.EgressIPClient, EgressFirewallClient: ovnClient.EgressFirewallClient, EgressServiceClient: ovnClient.EgressServiceClient, + APBRouteClient: ovnClient.AdminPolicyRouteClient, }, wf, recorder, diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 0aead0e5dc..753617db3f 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -31,6 +31,7 @@ import ( networkattchmentdefclientset "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned" ocpcloudnetworkclientset "github.com/openshift/client-go/cloudnetwork/clientset/versioned" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" egressfirewallclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned" egressipclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned" egressqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned" @@ -48,6 +49,7 @@ type OVNClientset struct { NetworkAttchDefClient networkattchmentdefclientset.Interface MultiNetworkPolicyClient multinetworkpolicyclientset.Interface EgressServiceClient egressserviceclientset.Interface + AdminPolicyRouteClient adminpolicybasedrouteclientset.Interface } // OVNMasterClientset @@ -58,11 +60,13 @@ type OVNMasterClientset struct { EgressQoSClient egressqosclientset.Interface MultiNetworkPolicyClient multinetworkpolicyclientset.Interface EgressServiceClient egressserviceclientset.Interface + AdminPolicyRouteClient adminpolicybasedrouteclientset.Interface } type OVNNodeClientset struct { - KubeClient kubernetes.Interface - EgressServiceClient egressserviceclientset.Interface + KubeClient kubernetes.Interface + EgressServiceClient egressserviceclientset.Interface + AdminPolicyRouteClient adminpolicybasedrouteclientset.Interface } type OVNClusterManagerClientset struct { @@ -80,6 +84,7 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { EgressQoSClient: cs.EgressQoSClient, MultiNetworkPolicyClient: cs.MultiNetworkPolicyClient, EgressServiceClient: cs.EgressServiceClient, + AdminPolicyRouteClient: cs.AdminPolicyRouteClient, } } @@ -94,8 +99,9 @@ func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset func (cs *OVNClientset) GetNodeClientset() *OVNNodeClientset { return &OVNNodeClientset{ - KubeClient: cs.KubeClient, - EgressServiceClient: cs.EgressServiceClient, + KubeClient: cs.KubeClient, + EgressServiceClient: cs.EgressServiceClient, + AdminPolicyRouteClient: cs.AdminPolicyRouteClient, } } @@ -220,6 +226,11 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { return nil, err } + adminPolicyBasedRouteClientset, err := adminpolicybasedrouteclientset.NewForConfig(kconfig) + if err != nil { + return nil, err + } + return &OVNClientset{ KubeClient: kclientset, EgressIPClient: egressIPClientset, @@ -229,6 +240,7 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { NetworkAttchDefClient: networkAttchmntDefClientset, MultiNetworkPolicyClient: multiNetworkPolicyClientset, EgressServiceClient: egressserviceClientset, + AdminPolicyRouteClient: adminPolicyBasedRouteClientset, }, nil } diff --git a/go-controller/pkg/util/net_linux.go b/go-controller/pkg/util/net_linux.go index ddcda76d4b..c0f9648982 100644 --- a/go-controller/pkg/util/net_linux.go +++ b/go-controller/pkg/util/net_linux.go @@ -428,7 +428,6 @@ func DeleteConntrack(ip string, port int32, protocol kapi.Protocol, ipFilterType return fmt.Errorf("could not add label %s to conntrack filter: %v", labels, err) } } - if ipAddress.To4() != nil { if _, err := netLinkOps.ConntrackDeleteFilter(netlink.ConntrackTable, netlink.FAMILY_V4, filter); err != nil { return err diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 3d0ac0e39b..979db0f941 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -266,8 +266,8 @@ func createGenericPod(f *framework.Framework, podName, nodeSelector, namespace s } // Create a pod on the specified node using the agnostic host image -func createGenericPodWithLabel(f *framework.Framework, podName, nodeSelector, namespace string, command []string, labels map[string]string) (*v1.Pod, error) { - return createPod(f, podName, nodeSelector, namespace, command, labels) +func createGenericPodWithLabel(f *framework.Framework, podName, nodeSelector, namespace string, command []string, labels map[string]string, options ...func(*v1.Pod)) (*v1.Pod, error) { + return createPod(f, podName, nodeSelector, namespace, command, labels, options...) } func createServiceForPodsWithLabel(f *framework.Framework, namespace string, servicePort int32, targetPort string, serviceType string, labels map[string]string) (string, error) { @@ -328,6 +328,11 @@ func deleteClusterExternalContainer(containerName string) { if err != nil { framework.Failf("failed to delete external test container, err: %v", err) } + gomega.Eventually(func() string { + output, err := runCommand(containerRuntime, "ps", "-f", fmt.Sprintf("name=%s", containerName), "-q") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return output + }, 5).Should(gomega.HaveLen(0)) } func updateNamespace(f *framework.Framework, namespace *v1.Namespace) { @@ -341,7 +346,7 @@ func getNamespace(f *framework.Framework, name string) *v1.Namespace { } func updatePod(f *framework.Framework, pod *v1.Pod) { - _, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Update(context.Background(), pod, metav1.UpdateOptions{}) + _, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Update(context.Background(), pod, metav1.UpdateOptions{}) framework.ExpectNoError(err, fmt.Sprintf("unable to update pod: %s, err: %v", pod.Name, err)) } func getPod(f *framework.Framework, podName string) *v1.Pod { diff --git a/test/e2e/external_gateways.go b/test/e2e/external_gateways.go index 82d46504a9..d75b7bc645 100644 --- a/test/e2e/external_gateways.go +++ b/test/e2e/external_gateways.go @@ -1,6 +1,8 @@ package e2e import ( + "context" + "encoding/json" "fmt" "net" "os" @@ -10,12 +12,16 @@ import ( "time" "github.com/google/go-cmp/cmp" + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" "github.com/onsi/ginkgo" ginkgotable "github.com/onsi/ginkgo/extensions/table" "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" + "k8s.io/klog" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" "k8s.io/kubernetes/test/e2e/framework/skipper" @@ -28,6 +34,7 @@ const ( srcHTTPPort = 80 srcUDPPort = 90 externalGatewayPodIPsAnnotation = "k8s.ovn.org/external-gw-pod-ips" + defaultPolicyName = "default-route-policy" ) var externalContainerNetwork = "kind" @@ -63,713 +70,1850 @@ type gatewayTestIPs struct { targetIPs []string } -// Validate pods can reach a network running in a container's looback address via -// an external gateway running on eth0 of the container without any tunnel encap. -// The traffic will get proxied through an annotated pod in the serving namespace. -var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { - const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" - ) +var _ = ginkgo.Describe("External Gateway test suite", func() { + + var _ = ginkgo.Context("With annotations", func() { + + // Validate pods can reach a network running in a container's looback address via + // an external gateway running on eth0 of the container without any tunnel encap. + // The traffic will get proxied through an annotated pod in the serving namespace. + var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - var ( - sleepCommand = []string{"bash", "-c", "sleep 20000"} - addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface - servingNamespace string - ) + ns, err := f.CreateNamespace("exgw-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name - var ( - gwContainers []string - ) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) + setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) + }) - f := wrappedTestFramework(svcname) + ginkgo.AfterEach(func() { + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + resetGatewayAnnotations(f) + }) - ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach - // retrieve worker node names - nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway CR", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } - ns, err := f.CreateNamespace("exgw-serving", nil) - framework.ExpectNoError(err) - servingNamespace = ns.Name + ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) - setupGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) - }) + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) - ginkgo.AfterEach(func() { - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) - }) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } - ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(addresses *gatewayTestIPs, icmpCommand string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + pingSync := sync.WaitGroup{} + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) + } + pingSync.Wait() + tcpDumpSync.Wait() + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp"), + ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) + + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", + func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } - ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + } - tcpDumpSync := sync.WaitGroup{} - tcpDumpSync.Add(len(gwContainers)) + expectedHostNames := make(map[string]struct{}) + for _, c := range gwContainers { + res, err := runCommand(containerRuntime, "exec", c, "hostname") + framework.ExpectNoError(err, "failed to run hostname in %s", c) + hostname := strings.TrimSuffix(res, "\n") + framework.Logf("Hostname for %s is %s", c, hostname) + expectedHostNames[hostname] = struct{}{} + } + framework.Logf("Expected hostnames are %v", expectedHostNames) - for _, gwContainer := range gwContainers { - go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) - } + ginkgo.By("Checking that external ips are reachable with both gateways") + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + args := []string{"exec", srcPingPodName, "--"} + if protocol == "tcp" { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + } else { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + } + res, err := framework.RunKubectl(f.Namespace.Name, args...) + framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + hostname := strings.TrimSuffix(res, "\n") + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } - pingSync := sync.WaitGroup{} - // Verify the external gateway loopback address running on the external container is reachable and - // that traffic from the source ping pod is proxied through the pod in the serving namespace - ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") - for _, t := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) - }(t) - } - pingSync.Wait() - tcpDumpSync.Wait() - }, - ginkgotable.Entry("ipv4", &addressesv4, "icmp"), - ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) - - ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - for _, container := range gwContainers { - reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) - } + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - expectedHostNames := make(map[string]struct{}) - for _, c := range gwContainers { - res, err := runCommand(containerRuntime, "exec", c, "hostname") - framework.ExpectNoError(err, "failed to run hostname in %s", c) - hostname := strings.TrimSuffix(res, "\n") - framework.Logf("Hostname for %s is %s", c, hostname) - expectedHostNames[hostname] = struct{}{} - } - framework.Logf("Expected hostnames are %v", expectedHostNames) - - ginkgo.By("Checking that external ips are reachable with both gateways") - returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] - success := false - for i := 0; i < 20; i++ { - args := []string{"exec", srcPingPodName, "--"} - if protocol == "tcp" { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + }) + + // Validate pods can reach a network running in multiple container's loopback + // addresses via two external gateways running on eth0 of the container without + // any tunnel encap. This test defines two external gateways and validates ECMP + // functionality to the container loopbacks. To verify traffic reaches the + // gateways, tcpdump is running on the external gateways and will exit successfully + // once an ICMP packet is received from the annotated pod in the k8s cluster. + // Two additional gateways are added to verify the tcp / udp protocols. + // They run the netexec command, and the pod asks to return their hostname. + // The test checks that both hostnames are collected at least once. + var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + testTimeout string = "30" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + externalTCPPort = 80 + externalUDPPort = 90 + ) + + f := wrappedTestFramework(svcname) + + var gwContainers []string + var addressesv4, addressesv6 gatewayTestIPs + + ginkgo.BeforeEach(func() { + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } + + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) + + }) + + ginkgo.AfterEach(func() { + // tear down the containers simulating the gateways + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + resetGatewayAnnotations(f) + }) + + ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) + + ginkgo.By("Verifying connectivity to the pod from external gateways") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + ginkgo.By("Verifying connectivity to the pod from external gateways with large packets > pod MTU") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-s", "1420", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // Verify the gateways and remote loopback addresses are reachable from the pod. + // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see + // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 + ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") + + // Check for egress traffic to both gateway loopback addresses using tcpdump, since + // /proc/net/dev counters only record the ingress interface traffic is received on. + // The test will waits until an ICMP packet is matched on the gateways or fail the + // test if a packet to the loopback is not received within the timer interval. + // If an ICMP packet is never detected, return the error via the specified chanel. + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) + } + + pingSync := sync.WaitGroup{} + + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } + pingSync.Wait() + tcpDumpSync.Wait() + + }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), + ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) + + // This test runs a listener on the external container, returning the host name both on tcp and udp. + // The src pod tries to hit the remote address until both the containers are hit. + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) + + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPodName, container, protocol) + } + + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + success := false + + // Picking only the first address, the one the udp listener is set for + target := addresses.targetIPs[0] + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } + + }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort, srcUDPPort), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort, srcHTTPPort), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort, srcUDPPort), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort, srcHTTPPort)) + }) + + var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ) + + var ( + servingNamespace string + ) + + f := wrappedTestFramework(svcname) + + var ( + addressesv4, addressesv6 gatewayTestIPs + sleepCommand []string + nodes *v1.NodeList + err error + clientSet kubernetes.Interface + ) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err = e2enode.GetBoundedReadySchedulableNodes(clientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } + + ns, err := f.CreateNamespace("exgw-conntrack-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name + + addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + sleepCommand = []string{"bash", "-c", "sleep 20000"} + _, err = createGenericPod(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand) + framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + _, err = createGenericPod(f, gatewayPodName2, nodes.Items[1].Name, servingNamespace, sleepCommand) + framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + }) + + ginkgo.AfterEach(func() { + // tear down the containers and pods simulating the gateways + ginkgo.By("Deleting the gateway containers") + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + resetGatewayAnnotations(f) + }) + + ginkgotable.DescribeTable("Namespace annotation: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + ginkgo.By("Annotate the app namespace to get managed by external gateways") + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs...) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + } + + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol + + ginkgo.By("Remove second external gateway IP from the app namespace annotation") + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[0]) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + if protocol == "udp" { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(1)) // we still have the conntrack entry for the remaining gateway + gomega.Expect(totalPodConnEntries).To(gomega.Equal(5)) // 6-1 } else { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) + } + + ginkgo.By("Remove first external gateway IP from the app namespace annotation") + annotateNamespaceForGateway(f.Namespace.Name, false, "") + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + if protocol == "udp" { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(0)) // we don't have any remaining gateways left + gomega.Expect(totalPodConnEntries).To(gomega.Equal(4)) // 6-2 + } else { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) + } + + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + + ginkgotable.DescribeTable("ExternalGWPod annotation: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } - res, err := framework.RunKubectl(f.Namespace.Name, args...) - framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) - hostname := strings.TrimSuffix(res, "\n") - if hostname != "" { - returnedHostNames[hostname] = struct{}{} + ginkgo.By("Annotate the external gw pods to manage the src app pod namespace") + for i, gwPod := range []string{gatewayPodName1, gatewayPodName2} { + networkIPs := fmt.Sprintf("\"%s\"", addresses.gatewayIPs[i]) + if addresses.srcPodIP != "" && addresses.nodeIP != "" { + networkIPs = fmt.Sprintf("\"%s\", \"%s\"", addresses.gatewayIPs[i], addresses.gatewayIPs[i]) + } + annotatePodForGateway(gwPod, servingNamespace, f.Namespace.Name, networkIPs, false) } - if cmp.Equal(returnedHostNames, expectedHostNames) { - success = true - break + // ensure the conntrack deletion tracker annotation is updated + ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") + err := wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + ns := getNamespace(f, f.Namespace.Name) + return (ns.Annotations[externalGatewayPodIPsAnnotation] == fmt.Sprintf("%s,%s", addresses.gatewayIPs[0], addresses.gatewayIPs[1])), nil + }) + framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") } - } - framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - if !success { - framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) - } + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol + + ginkgo.By("Remove second external gateway pod's routing-namespace annotation") + annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) + + // ensure the conntrack deletion tracker annotation is updated + ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + ns := getNamespace(f, f.Namespace.Name) + return (ns.Annotations[externalGatewayPodIPsAnnotation] == addresses.gatewayIPs[0]), nil + }) + framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + if protocol == "udp" { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(1)) // we still have the conntrack entry for the remaining gateway + gomega.Expect(totalPodConnEntries).To(gomega.Equal(5)) // 6-1 + } else { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) + } - }, - ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), - ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), - ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), - ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) -}) + ginkgo.By("Remove first external gateway pod's routing-namespace annotation") + annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) + + // ensure the conntrack deletion tracker annotation is updated + ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + ns := getNamespace(f, f.Namespace.Name) + return (ns.Annotations[externalGatewayPodIPsAnnotation] == ""), nil + }) + framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + if protocol == "udp" { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(0)) // we don't have any remaining gateways left + gomega.Expect(totalPodConnEntries).To(gomega.Equal(4)) // 6-2 + } else { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) + } -// Validate pods can reach a network running in multiple container's loopback -// addresses via two external gateways running on eth0 of the container without -// any tunnel encap. This test defines two external gateways and validates ECMP -// functionality to the container loopbacks. To verify traffic reaches the -// gateways, tcpdump is running on the external gateways and will exit successfully -// once an ICMP packet is received from the annotated pod in the k8s cluster. -// Two additional gateways are added to verify the tcp / udp protocols. -// They run the netexec command, and the pod asks to return their hostname. -// The test checks that both hostnames are collected at least once. -var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { - const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - testTimeout string = "30" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 - ) + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + }) - f := wrappedTestFramework(svcname) + // BFD Tests are dual of external gateway. The only difference is that they enable BFD on ovn and + // on the external containers, and after doing one round veryfing that the traffic reaches both containers, + // they delete one and verify that the traffic is always reaching the only alive container. + var _ = ginkgo.Context("BFD", func() { + var _ = ginkgo.Describe("e2e non-vxlan external gateway through an annotated gateway pod", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + defaultPolicyName = "default-route-policy" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - var gwContainers []string - var addressesv4, addressesv6 gatewayTestIPs + ns, err := f.CreateNamespace("exgw-bfd-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name - ginkgo.BeforeEach(func() { - // retrieve worker node names - nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } + setupBFD := setupBFDOnContainer(nodes.Items) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) + setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, true) + }) - if externalContainerNetwork == "host" { - skipper.Skipf("Skipping as host network doesn't support multiple external gateways") - } + ginkgo.AfterEach(func() { + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + resetGatewayAnnotations(f) + }) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } - // remove the routing external annotation - annotateArgs := []string{ - "annotate", - "namespace", - f.Namespace.Name, - "k8s.ovn.org/routing-external-gws-", - } - ginkgo.By("Resetting the gw annotation") - framework.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) - }) + ginkgo.By("Verifying connectivity to the pod from external gateways") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // This is needed for bfd to sync up + time.Sleep(3 * time.Second) + + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") + } + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } + + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + + pingSync := sync.WaitGroup{} + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPingPodName, err) + } + }(address) + } + + pingSync.Wait() + tcpDumpSync.Wait() + + if len(gwContainers) > 1 { + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + time.Sleep(3 * time.Second) // bfd timeout + + tcpDumpSync = sync.WaitGroup{} + tcpDumpSync.Add(1) + go checkPingOnContainer(gwContainers[0], srcPingPodName, icmpCommand, &tcpDumpSync) + + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + pingSync = sync.WaitGroup{} + + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) + } + pingSync.Wait() + tcpDumpSync.Wait() + } + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp"), + ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) + + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", + func(protocol string, addresses *gatewayTestIPs, destPort int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") + } + + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } + + if len(gwContainers) > 1 { + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + ginkgo.By("Waiting for BFD to sync") + time.Sleep(3 * time.Second) // bfd timeout + + // ECMP should direct all the traffic to the only container + expectedHostName := hostNameForContainer(gwContainers[0]) + + ginkgo.By("Checking hostname multiple times") + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") + } + } + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort)) + }) + + // Validate pods can reach a network running in multiple container's loopback + // addresses via two external gateways running on eth0 of the container without + // any tunnel encap. This test defines two external gateways and validates ECMP + // functionality to the container loopbacks. To verify traffic reaches the + // gateways, tcpdump is running on the external gateways and will exit successfully + // once an ICMP packet is received from the annotated pod in the k8s cluster. + // Two additional gateways are added to verify the tcp / udp protocols. + // They run the netexec command, and the pod asks to return their hostname. + // The test checks that both hostnames are collected at least once. + var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + testTimeout string = "30" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + externalTCPPort = 80 + externalUDPPort = 90 + ) + + var ( + gwContainers []string + ) + + testContainer := fmt.Sprintf("%s-container", srcPodName) + testContainerFlag := fmt.Sprintf("--container=%s", testContainer) + + f := wrappedTestFramework(svcname) + + var addressesv4, addressesv6 gatewayTestIPs + + ginkgo.BeforeEach(func() { + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } + + setupBFD := setupBFDOnContainer(nodes.Items) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) + + }) + + ginkgo.AfterEach(func() { + // tear down the containers simulating the gateways + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + resetGatewayAnnotations(f) + }) + + ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // This is needed for bfd to sync up + time.Sleep(3 * time.Second) + + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") + } + + // Verify the gateways and remote loopback addresses are reachable from the pod. + // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see + // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 + ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") + + // Check for egress traffic to both gateway loopback addresses using tcpdump, since + // /proc/net/dev counters only record the ingress interface traffic is received on. + // The test will waits until an ICMP packet is matched on the gateways or fail the + // test if a packet to the loopback is not received within the timer interval. + // If an ICMP packet is never detected, return the error via the specified chanel. + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) + } + + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + + pingSync := sync.WaitGroup{} + + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } + + pingSync.Wait() + tcpDumpSync.Wait() + + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + time.Sleep(3 * time.Second) // bfd timeout + + pingSync = sync.WaitGroup{} + tcpDumpSync = sync.WaitGroup{} + + tcpDumpSync.Add(1) + go checkPingOnContainer(gwContainers[0], srcPodName, icmpToDump, &tcpDumpSync) + + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } + + pingSync.Wait() + tcpDumpSync.Wait() + + }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), + ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) + + // This test runs a listener on the external container, returning the host name both on tcp and udp. + // The src pod tries to hit the remote address until both the containers are hit. + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) + + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // This is needed for bfd to sync up + time.Sleep(3 * time.Second) + + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") + } + + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + success := false + + // Picking only the first address, the one the udp listener is set for + target := addresses.targetIPs[0] + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } + + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + ginkgo.By("Waiting for BFD to sync") + time.Sleep(3 * time.Second) // bfd timeout + + // ECMP should direct all the traffic to the only container + expectedHostName := hostNameForContainer(gwContainers[0]) + + ginkgo.By("Checking hostname multiple times") + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") + } + }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort)) + }) + }) - ginkgo.AfterEach(func() { - // tear down the containers simulating the gateways - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) }) - ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + var _ = ginkgo.Context("With Admin Policy Based External Route CRs", func() { + + // Validate pods can reach a network running in a container's looback address via + // an external gateway running on eth0 of the container without any tunnel encap. + // The traffic will get proxied through an annotated pod in the serving namespace. + var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) + ns, err := f.CreateNamespace("exgw-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name - ginkgo.By("Verifying connectivity to the pod from external gateways") - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) + setupPolicyBasedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6) + }) - ginkgo.By("Verifying connectivity to the pod from external gateways with large packets > pod MTU") - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-s", "1420", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + ginkgo.AfterEach(func() { + deleteAPBExternalRouteCR(defaultPolicyName) + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + }) - // Verify the gateways and remote loopback addresses are reachable from the pod. - // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see - // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 - ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a gateway pod", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) - // Check for egress traffic to both gateway loopback addresses using tcpdump, since - // /proc/net/dev counters only record the ingress interface traffic is received on. - // The test will waits until an ICMP packet is matched on the gateways or fail the - // test if a packet to the loopback is not received within the timer interval. - // If an ICMP packet is never detected, return the error via the specified chanel. + ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - tcpDumpSync := sync.WaitGroup{} - tcpDumpSync.Add(len(gwContainers)) - for _, gwContainer := range gwContainers { - go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) - } + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } - pingSync := sync.WaitGroup{} - - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. - for _, address := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + pingSync := sync.WaitGroup{} + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) + } + pingSync.Wait() + tcpDumpSync.Wait() + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp"), + ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) + + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a gateway pod", + func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + } + + expectedHostNames := make(map[string]struct{}) + for _, c := range gwContainers { + res, err := runCommand(containerRuntime, "exec", c, "hostname") + framework.ExpectNoError(err, "failed to run hostname in %s", c) + hostname := strings.TrimSuffix(res, "\n") + framework.Logf("Hostname for %s is %s", c, hostname) + expectedHostNames[hostname] = struct{}{} + } + framework.Logf("Expected hostnames are %v", expectedHostNames) + + ginkgo.By("Checking that external ips are reachable with both gateways") + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + args := []string{"exec", srcPingPodName, "--"} + if protocol == "tcp" { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + } else { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + } + res, err := framework.RunKubectl(f.Namespace.Name, args...) + framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + hostname := strings.TrimSuffix(res, "\n") + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } + + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + }) + + // Validate pods can reach a network running in multiple container's loopback + // addresses via two external gateways running on eth0 of the container without + // any tunnel encap. This test defines two external gateways and validates ECMP + // functionality to the container loopbacks. To verify traffic reaches the + // gateways, tcpdump is running on the external gateways and will exit successfully + // once an ICMP packet is received from the annotated pod in the k8s cluster. + // Two additional gateways are added to verify the tcp / udp protocols. + // They run the netexec command, and the pod asks to return their hostname. + // The test checks that both hostnames are collected at least once. + var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + testTimeout string = "30" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + externalTCPPort = 80 + externalUDPPort = 90 + ) + + f := wrappedTestFramework(svcname) + + var gwContainers []string + var addressesv4, addressesv6 gatewayTestIPs + + ginkgo.BeforeEach(func() { + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) } - }(address) - } - pingSync.Wait() - tcpDumpSync.Wait() - }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), - ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } + // ensure there are no namespaces with the gateway annotation + gomega.Eventually(func() int { + nsList, err := f.ClientSet.CoreV1().Namespaces().List(context.Background(), metav1.ListOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + count := 0 + for _, ns := range nsList.Items { + _, f1 := ns.Annotations["k8s.ovn.org/routing-external-gws"] + _, f2 := ns.Annotations["k8s.ovn.org/external-gw-pod-ips"] + if f1 || f2 { + count++ + } + } + return count + }, 20, 1).Should(gomega.Equal(0)) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) + }) + + ginkgo.AfterEach(func() { + // tear down the containers simulating the gateways + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + deleteAPBExternalRouteCR(defaultPolicyName) + }) + + ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) - // This test runs a listener on the external container, returning the host name both on tcp and udp. - // The src pod tries to hit the remote address until both the containers are hit. - ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort, destPortOnPod int) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + ginkgo.By("Verifying connectivity to the pod from external gateways") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) + ginkgo.By("Verifying connectivity to the pod from external gateways with large packets > pod MTU") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-s", "1420", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - for _, container := range gwContainers { - reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPodName, container, protocol) - } + // Verify the gateways and remote loopback addresses are reachable from the pod. + // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see + // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 + ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") - expectedHostNames := hostNamesForContainers(gwContainers) - framework.Logf("Expected hostnames are %v", expectedHostNames) + // Check for egress traffic to both gateway loopback addresses using tcpdump, since + // /proc/net/dev counters only record the ingress interface traffic is received on. + // The test will waits until an ICMP packet is matched on the gateways or fail the + // test if a packet to the loopback is not received within the timer interval. + // If an ICMP packet is never detected, return the error via the specified chanel. - returnedHostNames := make(map[string]struct{}) - success := false + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) + } - // Picking only the first address, the one the udp listener is set for - target := addresses.targetIPs[0] - for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) - if hostname != "" { - returnedHostNames[hostname] = struct{}{} - } - if cmp.Equal(returnedHostNames, expectedHostNames) { - success = true - break - } - } + pingSync := sync.WaitGroup{} - framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } + pingSync.Wait() + tcpDumpSync.Wait() - if !success { - framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) - } + }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), + ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) - }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort, srcUDPPort), - ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort, srcHTTPPort), - ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort, srcUDPPort), - ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort, srcHTTPPort)) -}) + // This test runs a listener on the external container, returning the host name both on tcp and udp. + // The src pod tries to hit the remote address until both the containers are hit. + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) -var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { - const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - srcPodName string = "e2e-exgw-src-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - ) + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPodName, container, protocol) + } - var ( - servingNamespace string - ) + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) - f := wrappedTestFramework(svcname) + returnedHostNames := make(map[string]struct{}) + success := false - var ( - addressesv4, addressesv6 gatewayTestIPs - sleepCommand []string - nodes *v1.NodeList - err error - clientSet kubernetes.Interface - ) + // Picking only the first address, the one the udp listener is set for + target := addresses.targetIPs[0] + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } - ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach - // retrieve worker node names - nodes, err = e2enode.GetBoundedReadySchedulableNodes(clientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - if externalContainerNetwork == "host" { - skipper.Skipf("Skipping as host network doesn't support multiple external gateways") - } + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - ns, err := f.CreateNamespace("exgw-conntrack-serving", nil) - framework.ExpectNoError(err) - servingNamespace = ns.Name + }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort, srcUDPPort), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort, srcHTTPPort), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort, srcUDPPort), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort, srcHTTPPort)) + }) - addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) - sleepCommand = []string{"bash", "-c", "sleep 20000"} - _, err = createGenericPod(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand) - framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) - _, err = createGenericPod(f, gatewayPodName2, nodes.Items[1].Name, servingNamespace, sleepCommand) - framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ) + + var ( + servingNamespace string + ) + + f := wrappedTestFramework(svcname) + + var ( + addressesv4, addressesv6 gatewayTestIPs + sleepCommand []string + nodes *v1.NodeList + err error + clientSet kubernetes.Interface + ) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err = e2enode.GetBoundedReadySchedulableNodes(clientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - // remove the routing external annotation - annotateArgs := []string{ - "annotate", - "namespace", - f.Namespace.Name, - "k8s.ovn.org/routing-external-gws-", - } - ginkgo.By("Resetting the gw annotation") - framework.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) - }) + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } - ginkgo.AfterEach(func() { - // tear down the containers and pods simulating the gateways - ginkgo.By("Deleting the gateway containers") - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) - }) + ns, err := f.CreateNamespace("exgw-conntrack-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name + + addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + sleepCommand = []string{"bash", "-c", "sleep 20000"} + _, err = createGenericPodWithLabel(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand, map[string]string{"name": gatewayPodName1, "gatewayPod": "true"}) + framework.ExpectNoError(err, "Create the external gw pods to manage the src app pod namespace, failed: %v", err) + _, err = createGenericPodWithLabel(f, gatewayPodName2, nodes.Items[1].Name, servingNamespace, sleepCommand, map[string]string{"name": gatewayPodName2, "gatewayPod": "true"}) + framework.ExpectNoError(err, "Create the external gw pods to manage the src app pod namespace, failed: %v", err) + }) + + ginkgo.AfterEach(func() { + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + deleteAPBExternalRouteCR(defaultPolicyName) + }) + + ginkgotable.DescribeTable("Static Hop: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + ginkgo.By("Create a static hop in an Admin Policy Based External Route CR targeting the app namespace to get managed by external gateways") + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + klog.Infof("iperf3 command %s", strings.Join(cmd, " ")) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + } + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := 2 + totalPodConnEntries := 6 + gomega.Eventually(func() int { + return pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + }, time.Minute, 5).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + updateAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs[0]) + if protocol == "udp" { + podConnEntriesWithMACLabelsSet = 1 // we still have the conntrack entry for the remaining gateway + totalPodConnEntries = 5 // 6-1 + } - ginkgotable.DescribeTable("Namespace annotation: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } - ginkgo.By("Annotate the app namespace to get managed by external gateways") - annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[0], addresses.gatewayIPs[1]) - - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) - } - macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { - ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) - // Trim leading 0s because conntrack dumped labels are just integers - // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") - } + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, 10).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) - ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") - nodeName := getPod(f, srcPodName).Spec.NodeName - podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol - - ginkgo.By("Remove second external gateway IP from the app namespace annotation") - annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[0]) - - ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") - podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - if protocol == "udp" { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(1)) // we still have the conntrack entry for the remaining gateway - gomega.Expect(totalPodConnEntries).To(gomega.Equal(5)) // 6-1 - } else { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - } + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) - ginkgo.By("Remove first external gateway IP from the app namespace annotation") - annotateNamespaceForGateway(f.Namespace.Name, false, "") - - ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") - podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - if protocol == "udp" { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(0)) // we don't have any remaining gateways left - gomega.Expect(totalPodConnEntries).To(gomega.Equal(4)) // 6-2 - } else { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - } + ginkgo.By("Remove the remaining static hop from the CR") + deleteAPBExternalRouteCR(defaultPolicyName) + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + + podConnEntriesWithMACLabelsSet = 2 + totalPodConnEntries = 6 + if protocol == "udp" { + podConnEntriesWithMACLabelsSet = 0 // we don't have any remaining gateways left + totalPodConnEntries = 4 // 6-2 + } - }, - ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), - ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), - ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), - ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, time.Minute, 5).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) - ginkgotable.DescribeTable("ExternalGWPod annotation: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } - ginkgo.By("Annotate the external gw pods to manage the src app pod namespace") - for i, gwPod := range []string{gatewayPodName1, gatewayPodName2} { - networkIPs := fmt.Sprintf("\"%s\"", addresses.gatewayIPs[i]) - if addresses.srcPodIP != "" && addresses.nodeIP != "" { - networkIPs = fmt.Sprintf("\"%s\", \"%s\"", addresses.gatewayIPs[i], addresses.gatewayIPs[i]) - } - annotatePodForGateway(gwPod, servingNamespace, f.Namespace.Name, networkIPs, false) - } + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + + ginkgotable.DescribeTable("Dynamic Hop: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + for i, gwPod := range []string{gatewayPodName1, gatewayPodName2} { + annotateMultusNetworkStatusInPodGateway(gwPod, servingNamespace, []string{addresses.gatewayIPs[i], addresses.gatewayIPs[i]}) + } + + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + klog.Infof("run command %+v", cmd) + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + } + + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := 2 // TCP + totalPodConnEntries := 6 // TCP + + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, time.Minute, 5).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) // total conntrack entries for this pod/protocol + + ginkgo.By("Remove second external gateway pod's routing-namespace annotation") + p := getGatewayPod(f, servingNamespace, gatewayPodName2) + p.Labels = map[string]string{"name": gatewayPodName2} + updatePod(f, p) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + if protocol == "udp" { + podConnEntriesWithMACLabelsSet = 1 + totalPodConnEntries = 5 + } + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, 10).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) + + ginkgo.By("Remove first external gateway pod's routing-namespace annotation") + p = getGatewayPod(f, servingNamespace, gatewayPodName1) + p.Labels = map[string]string{"name": gatewayPodName1} + updatePod(f, p) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = 2 // TCP + totalPodConnEntries = 6 // TCP + if protocol == "udp" { + podConnEntriesWithMACLabelsSet = 0 //we don't have any remaining gateways left + totalPodConnEntries = 4 + } + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, 5).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) - // ensure the conntrack deletion tracker annotation is updated - ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") - err := wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { - ns := getNamespace(f, f.Namespace.Name) - return (ns.Annotations[externalGatewayPodIPsAnnotation] == fmt.Sprintf("%s,%s", addresses.gatewayIPs[0], addresses.gatewayIPs[1])), nil }) - framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) - } - macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { - ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) - // Trim leading 0s because conntrack dumped labels are just integers - // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") - } + // BFD Tests are dual of external gateway. The only difference is that they enable BFD on ovn and + // on the external containers, and after doing one round veryfing that the traffic reaches both containers, + // they delete one and verify that the traffic is always reaching the only alive container. + var _ = ginkgo.Context("BFD", func() { + + var _ = ginkgo.Describe("e2e non-vxlan external gateway through a dynamic hop", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + defaultPolicyName = "default-route-policy" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + ns, err := f.CreateNamespace("exgw-bfd-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name + + setupBFD := setupBFDOnContainer(nodes.Items) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) + ginkgo.By("Create the external route policy with dynamic hops to manage the src app pod namespace") + + setupPolicyBasedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6) + }) + + ginkgo.AfterEach(func() { + deleteAPBExternalRouteCR(defaultPolicyName) + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + }) + + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with dynamic hop", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, true, addressesv4.gatewayIPs) + + ginkgo.By("Verifying connectivity to the pod from external gateways") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // This is needed for bfd to sync up + for _, gwContainer := range gwContainers { + gomega.Eventually(func() bool { + return isBFDPaired(gwContainer, addresses.nodeIP) + }, time.Minute, 5).Should(gomega.BeTrue(), "Bfd not paired") + } + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } + + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + + pingSync := sync.WaitGroup{} + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPingPodName, err) + } + }(address) + } + + pingSync.Wait() + tcpDumpSync.Wait() + + if len(gwContainers) > 1 { + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + time.Sleep(3 * time.Second) // bfd timeout + + tcpDumpSync = sync.WaitGroup{} + tcpDumpSync.Add(1) + go checkPingOnContainer(gwContainers[0], srcPingPodName, icmpCommand, &tcpDumpSync) + + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + pingSync = sync.WaitGroup{} + + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) + } + pingSync.Wait() + tcpDumpSync.Wait() + } + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp"), + ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) + + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with a dynamic hop", + func(protocol string, addresses *gatewayTestIPs, destPort int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, true, addressesv4.gatewayIPs) + + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + for _, gwContainer := range gwContainers { + gomega.Eventually(func() bool { + return isBFDPaired(gwContainer, addresses.nodeIP) + }, 10, 1).Should(gomega.BeTrue(), "Bfd not paired") + } + + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") - nodeName := getPod(f, srcPodName).Spec.NodeName - podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol - - ginkgo.By("Remove second external gateway pod's routing-namespace annotation") - annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) - - // ensure the conntrack deletion tracker annotation is updated - ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") - err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { - ns := getNamespace(f, f.Namespace.Name) - return (ns.Annotations[externalGatewayPodIPsAnnotation] == fmt.Sprintf("%s", addresses.gatewayIPs[0])), nil - }) - framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) - - ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") - podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - if protocol == "udp" { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(1)) // we still have the conntrack entry for the remaining gateway - gomega.Expect(totalPodConnEntries).To(gomega.Equal(5)) // 6-1 - } else { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - } + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - ginkgo.By("Remove first external gateway pod's routing-namespace annotation") - annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) + if len(gwContainers) > 1 { + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + ginkgo.By("Waiting for BFD to sync") + time.Sleep(3 * time.Second) // bfd timeout - // ensure the conntrack deletion tracker annotation is updated - ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") - err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { - ns := getNamespace(f, f.Namespace.Name) - return (ns.Annotations[externalGatewayPodIPsAnnotation] == ""), nil - }) - framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) - - ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") - podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - if protocol == "udp" { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(0)) // we don't have any remaining gateways left - gomega.Expect(totalPodConnEntries).To(gomega.Equal(4)) // 6-2 - } else { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - } + // ECMP should direct all the traffic to the only container + expectedHostName := hostNameForContainer(gwContainers[0]) - }, - ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), - ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), - ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), - ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) -}) + ginkgo.By("Checking hostname multiple times") + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") + } + } + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort)) + }) + + // Validate pods can reach a network running in multiple container's loopback + // addresses via two external gateways running on eth0 of the container without + // any tunnel encap. This test defines two external gateways and validates ECMP + // functionality to the container loopbacks. To verify traffic reaches the + // gateways, tcpdump is running on the external gateways and will exit successfully + // once an ICMP packet is received from the annotated pod in the k8s cluster. + // Two additional gateways are added to verify the tcp / udp protocols. + // They run the netexec command, and the pod asks to return their hostname. + // The test checks that both hostnames are collected at least once. + var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + testTimeout string = "30" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + externalTCPPort = 80 + externalUDPPort = 90 + ) + + var ( + gwContainers []string + ) + + testContainer := fmt.Sprintf("%s-container", srcPodName) + testContainerFlag := fmt.Sprintf("--container=%s", testContainer) + + f := wrappedTestFramework(svcname) + + var addressesv4, addressesv6 gatewayTestIPs + + ginkgo.BeforeEach(func() { + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } -// BFD Tests are dual of external gateway. The only difference is that they enable BFD on ovn and -// on the external containers, and after doing one round veryfing that the traffic reaches both containers, -// they delete one and verify that the traffic is always reaching the only alive container. -var _ = ginkgo.Context("BFD", func() { - var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { - const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" - ) - - var ( - sleepCommand = []string{"bash", "-c", "sleep 20000"} - addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface - servingNamespace string - ) - - var ( - gwContainers []string - ) - - f := wrappedTestFramework(svcname) - - ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach - // retrieve worker node names - nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } - ns, err := f.CreateNamespace("exgw-bfd-serving", nil) - framework.ExpectNoError(err) - servingNamespace = ns.Name + setupBFD := setupBFDOnContainer(nodes.Items) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) - setupBFD := setupBFDOnContainer(nodes.Items) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) - setupGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, true) - }) + }) - ginkgo.AfterEach(func() { - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) - }) + ginkgo.AfterEach(func() { + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + deleteAPBExternalRouteCR(defaultPolicyName) + }) - ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(addresses *gatewayTestIPs, icmpCommand string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, true, addresses.gatewayIPs...) - ginkgo.By("Verifying connectivity to the pod from external gateways") - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) + for _, gwContainer := range gwContainers { + gomega.Eventually(func() bool { + return isBFDPaired(gwContainer, addresses.nodeIP) + }, 5).Should(gomega.BeTrue(), "Bfd not paired") + } - for _, gwContainer := range gwContainers { - framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") - } + // Verify the gateways and remote loopback addresses are reachable from the pod. + // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see + // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 + ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") + + // Check for egress traffic to both gateway loopback addresses using tcpdump, since + // /proc/net/dev counters only record the ingress interface traffic is received on. + // The test will waits until an ICMP packet is matched on the gateways or fail the + // test if a packet to the loopback is not received within the timer interval. + // If an ICMP packet is never detected, return the error via the specified chanel. + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) + } - tcpDumpSync := sync.WaitGroup{} - tcpDumpSync.Add(len(gwContainers)) - for _, gwContainer := range gwContainers { - go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) - } + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. - // Verify the external gateway loopback address running on the external container is reachable and - // that traffic from the source ping pod is proxied through the pod in the serving namespace - ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + pingSync := sync.WaitGroup{} - pingSync := sync.WaitGroup{} - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. - for _, address := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPingPodName, err) - } - }(address) - } + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } - pingSync.Wait() - tcpDumpSync.Wait() + pingSync.Wait() + tcpDumpSync.Wait() - if len(gwContainers) > 1 { ginkgo.By("Deleting one container") deleteClusterExternalContainer(gwContainers[1]) time.Sleep(3 * time.Second) // bfd timeout + pingSync = sync.WaitGroup{} tcpDumpSync = sync.WaitGroup{} - tcpDumpSync.Add(1) - go checkPingOnContainer(gwContainers[0], srcPingPodName, icmpCommand, &tcpDumpSync) - // Verify the external gateway loopback address running on the external container is reachable and - // that traffic from the source ping pod is proxied through the pod in the serving namespace - ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") - pingSync = sync.WaitGroup{} + tcpDumpSync.Add(1) + go checkPingOnContainer(gwContainers[0], srcPodName, icmpToDump, &tcpDumpSync) - for _, t := range addresses.targetIPs { + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { pingSync.Add(1) go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) - }(t) + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) } + pingSync.Wait() tcpDumpSync.Wait() - } - }, - ginkgotable.Entry("ipv4", &addressesv4, "icmp"), - ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) - ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(protocol string, addresses *gatewayTestIPs, destPort int) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), + ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + // This test runs a listener on the external container, returning the host name both on tcp and udp. + // The src pod tries to hit the remote address until both the containers are hit. + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, true, addresses.gatewayIPs...) - for _, gwContainer := range gwContainers { - framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") - } + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - expectedHostNames := hostNamesForContainers(gwContainers) - framework.Logf("Expected hostnames are %v", expectedHostNames) + // This is needed for bfd to sync up + time.Sleep(3 * time.Second) - returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] - success := false - for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) - if hostname != "" { - returnedHostNames[hostname] = struct{}{} + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") } - if cmp.Equal(returnedHostNames, expectedHostNames) { - success = true - break + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + success := false + + // Picking only the first address, the one the udp listener is set for + target := addresses.targetIPs[0] + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } } - } - framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - if !success { - framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) - } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - if len(gwContainers) > 1 { ginkgo.By("Deleting one container") deleteClusterExternalContainer(gwContainers[1]) ginkgo.By("Waiting for BFD to sync") @@ -780,232 +1924,334 @@ var _ = ginkgo.Context("BFD", func() { ginkgo.By("Checking hostname multiple times") for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") } - } - }, - ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort), - ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort), - ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort), - ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort)) - }) - - // Validate pods can reach a network running in multiple container's loopback - // addresses via two external gateways running on eth0 of the container without - // any tunnel encap. This test defines two external gateways and validates ECMP - // functionality to the container loopbacks. To verify traffic reaches the - // gateways, tcpdump is running on the external gateways and will exit successfully - // once an ICMP packet is received from the annotated pod in the k8s cluster. - // Two additional gateways are added to verify the tcp / udp protocols. - // They run the netexec command, and the pod asks to return their hostname. - // The test checks that both hostnames are collected at least once. - var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { - const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - testTimeout string = "30" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 - ) - - var ( - gwContainers []string - ) - - testContainer := fmt.Sprintf("%s-container", srcPodName) - testContainerFlag := fmt.Sprintf("--container=%s", testContainer) - - f := wrappedTestFramework(svcname) - - var addressesv4, addressesv6 gatewayTestIPs - - ginkgo.BeforeEach(func() { - nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } - - if externalContainerNetwork == "host" { - skipper.Skipf("Skipping as host network doesn't support multiple external gateways") - } - - setupBFD := setupBFDOnContainer(nodes.Items) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) - - // remove the routing external annotation - annotateArgs := []string{ - "annotate", - "namespace", - f.Namespace.Name, - "k8s.ovn.org/routing-external-gws-", - } - ginkgo.By("Resetting the gw annotation") - framework.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) - }) - - ginkgo.AfterEach(func() { - // tear down the containers simulating the gateways - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) + }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort)) + }) }) + }) - ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + var _ = ginkgo.Context("When migrating from Annotations to Admin Policy Based External Route CRs", func() { + // Validate pods can reach a network running in a container's looback address via + // an external gateway running on eth0 of the container without any tunnel encap. + // The traffic will get proxied through an annotated pod in the serving namespace. + var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + ns, err := f.CreateNamespace("exgw-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) + setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) + }) - for _, gwContainer := range gwContainers { - framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") - } + ginkgo.AfterEach(func() { + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + deleteAPBExternalRouteCR(defaultPolicyName) + resetGatewayAnnotations(f) + }) - // Verify the gateways and remote loopback addresses are reachable from the pod. - // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see - // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 - ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") - - // Check for egress traffic to both gateway loopback addresses using tcpdump, since - // /proc/net/dev counters only record the ingress interface traffic is received on. - // The test will waits until an ICMP packet is matched on the gateways or fail the - // test if a packet to the loopback is not received within the timer interval. - // If an ICMP packet is never detected, return the error via the specified chanel. - - tcpDumpSync := sync.WaitGroup{} - tcpDumpSync.Add(len(gwContainers)) - for _, gwContainer := range gwContainers { - go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) - } + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway annotations and a policy CR and after the annotations are removed", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + ginkgo.By("Remove gateway annotations in pods") + annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) + annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) + ginkgo.By("Validate ICMP connectivity again with only CR policy to support it") + ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) - pingSync := sync.WaitGroup{} + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. - for _, address := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + pingSync := sync.WaitGroup{} + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) } - }(address) - } + pingSync.Wait() + tcpDumpSync.Wait() + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp")) - pingSync.Wait() - tcpDumpSync.Wait() + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod when deleting the annotation and supported by a CR with the same gateway IPs", + func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + ginkgo.By("removing the annotations in the pod gateways") + annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) + annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) - ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) - time.Sleep(3 * time.Second) // bfd timeout + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + } - pingSync = sync.WaitGroup{} - tcpDumpSync = sync.WaitGroup{} + expectedHostNames := make(map[string]struct{}) + for _, c := range gwContainers { + res, err := runCommand(containerRuntime, "exec", c, "hostname") + framework.ExpectNoError(err, "failed to run hostname in %s", c) + hostname := strings.TrimSuffix(res, "\n") + framework.Logf("Hostname for %s is %s", c, hostname) + expectedHostNames[hostname] = struct{}{} + } + framework.Logf("Expected hostnames are %v", expectedHostNames) - tcpDumpSync.Add(1) - go checkPingOnContainer(gwContainers[0], srcPodName, icmpToDump, &tcpDumpSync) + ginkgo.By("Checking that external ips are reachable with both gateways") + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + args := []string{"exec", srcPingPodName, "--"} + if protocol == "tcp" { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + } else { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + } + res, err := framework.RunKubectl(f.Namespace.Name, args...) + framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + hostname := strings.TrimSuffix(res, "\n") + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. - for _, address := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } } - }(address) - } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - pingSync.Wait() - tcpDumpSync.Wait() + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), - ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + }) - // This test runs a listener on the external container, returning the host name both on tcp and udp. - // The src pod tries to hit the remote address until both the containers are hit. - ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort int) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ) + + var ( + servingNamespace string + ) + + f := wrappedTestFramework(svcname) + + var ( + addressesv4, addressesv6 gatewayTestIPs + sleepCommand []string + nodes *v1.NodeList + err error + clientSet kubernetes.Interface + ) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err = e2enode.GetBoundedReadySchedulableNodes(clientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + ns, err := f.CreateNamespace("exgw-conntrack-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name + + addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + sleepCommand = []string{"bash", "-c", "sleep 20000"} + _, err = createGenericPodWithLabel(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand, map[string]string{"gatewayPod": "true"}) + framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + _, err = createGenericPodWithLabel(f, gatewayPodName2, nodes.Items[1].Name, servingNamespace, sleepCommand, map[string]string{"gatewayPod": "true"}) + framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + }) + + ginkgo.AfterEach(func() { + // tear down the containers and pods simulating the gateways + ginkgo.By("Deleting the gateway containers") + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + deleteAPBExternalRouteCR(defaultPolicyName) + resetGatewayAnnotations(f) + }) + + ginkgotable.DescribeTable("Namespace annotation: Should validate conntrack entry remains unchanged when deleting the annotation in the namespace while the CR static hop still references the same namespace in the policy", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + ginkgo.By("Annotate the app namespace to get managed by external gateways") + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs...) + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + } + ginkgo.By("Removing the namespace annotations to leave only the CR policy active") + annotateNamespaceForGateway(f.Namespace.Name, false, "") - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol - for _, gwContainer := range gwContainers { - framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") - } + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - expectedHostNames := hostNamesForContainers(gwContainers) - framework.Logf("Expected hostnames are %v", expectedHostNames) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - returnedHostNames := make(map[string]struct{}) - success := false + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) - // Picking only the first address, the one the udp listener is set for - target := addresses.targetIPs[0] - for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) - if hostname != "" { - returnedHostNames[hostname] = struct{}{} + ginkgotable.DescribeTable("ExternalGWPod annotation: Should validate conntrack entry remains unchanged when deleting the annotation in the pods while the CR dynamic hop still references the same pods with the pod selector", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } - if cmp.Equal(returnedHostNames, expectedHostNames) { - success = true - break + ginkgo.By("Annotate the external gw pods to manage the src app pod namespace") + for i, gwPod := range []string{gatewayPodName1, gatewayPodName2} { + networkIPs := fmt.Sprintf("\"%s\"", addresses.gatewayIPs[i]) + if addresses.srcPodIP != "" && addresses.nodeIP != "" { + networkIPs = fmt.Sprintf("\"%s\", \"%s\"", addresses.gatewayIPs[i], addresses.gatewayIPs[i]) + } + annotatePodForGateway(gwPod, servingNamespace, f.Namespace.Name, networkIPs, false) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + // ensure the conntrack deletion tracker annotation is updated + ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") + err := wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + ns := getNamespace(f, f.Namespace.Name) + return (ns.Annotations[externalGatewayPodIPsAnnotation] == fmt.Sprintf("%s,%s", addresses.gatewayIPs[0], addresses.gatewayIPs[1])), nil + }) + framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) + annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) + annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") } - } - - framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - - if !success { - framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) - } - - ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) - ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout - // ECMP should direct all the traffic to the only container - expectedHostName := hostNameForContainer(gwContainers[0]) + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + }) - ginkgo.By("Checking hostname multiple times") - for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) - framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") - } - }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort), - ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort), - ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort), - ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort)) }) }) @@ -1148,14 +2394,14 @@ func setupGatewayContainers(f *framework.Framework, nodes *v1.NodeList, containe return gwContainers, addressesv4, addressesv6 } -func setupGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs, bfd bool) []string { +func setupAnnotatedGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs, bfd bool) []string { gwPods := []string{pod1, pod2} if externalContainerNetwork == "host" { gwPods = []string{pod1} } for i, gwPod := range gwPods { - _, err := createGenericPod(f, gwPod, nodes.Items[i].Name, ns, cmd) + _, err := createGenericPodWithLabel(f, gwPod, nodes.Items[i].Name, ns, cmd, map[string]string{"gatewayPod": "true"}) framework.ExpectNoError(err) } @@ -1170,6 +2416,24 @@ func setupGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns return gwPods } +func setupPolicyBasedGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs) []string { + gwPods := []string{pod1, pod2} + if externalContainerNetwork == "host" { + gwPods = []string{pod1} + } + + for i, gwPod := range gwPods { + _, err := createGenericPodWithLabel(f, gwPod, nodes.Items[i].Name, ns, cmd, map[string]string{"gatewayPod": "true"}) + framework.ExpectNoError(err) + } + + for i, gwPod := range gwPods { + annotateMultusNetworkStatusInPodGateway(gwPod, ns, []string{addressesv4.gatewayIPs[i]}) + } + + return gwPods +} + func cleanExGWContainers(clientSet kubernetes.Interface, gwContainers []string, addressesv4, addressesv6 gatewayTestIPs) { ginkgo.By("Deleting the gateway containers") if externalContainerNetwork == "host" { @@ -1185,23 +2449,26 @@ func cleanExGWContainers(clientSet kubernetes.Interface, gwContainers []string, // setupGatewayContainersForConntrackTest sets up iperf3 external containers, adds routes to src // pods via the nodes, starts up iperf3 server on src-pod func setupGatewayContainersForConntrackTest(f *framework.Framework, nodes *v1.NodeList, gwContainer1, gwContainer2, srcPodName string) (gatewayTestIPs, gatewayTestIPs) { + var ( + err error + clientPod *v1.Pod + ) addressesv4 := gatewayTestIPs{gatewayIPs: make([]string, 2)} addressesv6 := gatewayTestIPs{gatewayIPs: make([]string, 2)} - ginkgo.By("Creating the gateway containers for the UDP test") addressesv4.gatewayIPs[0], addressesv6.gatewayIPs[0] = createClusterExternalContainer(gwContainer1, iperf3Image, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) addressesv4.gatewayIPs[1], addressesv6.gatewayIPs[1] = createClusterExternalContainer(gwContainer2, iperf3Image, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) node := nodes.Items[0] - addressesv4.nodeIP, addressesv6.nodeIP = getContainerAddressesForNetwork(node.Name, externalContainerNetwork) - framework.Logf("the pod side node is %s and the source node ip is %s - %s", node.Name, addressesv4.nodeIP, addressesv6.nodeIP) - ginkgo.By("Creating the source pod to reach the destination ips from") - clientPod, err := createPod(f, srcPodName, node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { + clientPod, err = createPod(f, srcPodName, node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { p.Spec.Containers[0].Image = iperf3Image }) framework.ExpectNoError(err) + addressesv4.nodeIP, addressesv6.nodeIP = getContainerAddressesForNetwork(node.Name, externalContainerNetwork) + framework.Logf("the pod side node is %s and the source node ip is %s - %s", node.Name, addressesv4.nodeIP, addressesv6.nodeIP) + // start iperf3 servers at ports 5201 and 5202 on the src app pod args := []string{"exec", srcPodName, "--", "iperf3", "-s", "--daemon", "-V", fmt.Sprintf("-p %d", 5201)} _, err = framework.RunKubectl(f.Namespace.Name, args...) @@ -1265,20 +2532,40 @@ func annotatePodForGateway(podName, podNS, namespace, networkIPs string, bfd boo // this fakes out the multus annotation so that the pod IP is // actually an IP of an external container for testing purposes annotateArgs := []string{ - "annotate", - "pods", - "--overwrite", - podName, fmt.Sprintf("k8s.v1.cni.cncf.io/network-status=[{\"name\":\"%s\",\"interface\":"+ "\"net1\",\"ips\":[%s],\"mac\":\"%s\"}]", "foo", networkIPs, "01:23:45:67:89:10"), fmt.Sprintf("k8s.ovn.org/routing-namespaces=%s", namespace), fmt.Sprintf("k8s.ovn.org/routing-network=%s", "foo"), - "--overwrite", } if bfd { annotateArgs = append(annotateArgs, "k8s.ovn.org/bfd-enabled=\"\"") } - framework.Logf("Annotating the external gateway pod with annotation %s", annotateArgs) + annotatePodForGatewayWithAnnotations(podName, podNS, annotateArgs) +} + +func annotateMultusNetworkStatusInPodGateway(podName, podNS string, networkIPs []string) { + // add the annotations to the pod to enable the gateway forwarding. + // this fakes out the multus annotation so that the pod IP is + // actually an IP of an external container for testing purposes + nStatus := []nettypes.NetworkStatus{{Name: "foo", Interface: "net1", IPs: networkIPs, Mac: "01:23:45:67:89:10"}} + out, err := json.Marshal(nStatus) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + annotatePodForGatewayWithAnnotations(podName, podNS, []string{fmt.Sprintf("k8s.v1.cni.cncf.io/network-status=%s", string(out))}) +} + +func annotatePodForGatewayWithAnnotations(podName, podNS string, annotations []string) { + // add the annotations to the pod to enable the gateway forwarding. + // this fakes out the multus annotation so that the pod IP is + // actually an IP of an external container for testing purposes + annotateArgs := []string{ + "annotate", + "pods", + podName, + "--overwrite", + } + annotateArgs = append(annotateArgs, annotations...) + framework.Logf("Annotating the external gateway pod with annotation '%s'", annotateArgs) framework.RunKubectlOrDie(podNS, annotateArgs...) } @@ -1300,6 +2587,160 @@ func annotateNamespaceForGateway(namespace string, bfd bool, gateways ...string) framework.RunKubectlOrDie(namespace, annotateArgs...) } +func removeStaticGatewayAnnotationInNamespace(namespace string) { + + // annotate the test namespace with multiple gateways defined + annotateArgs := []string{ + "annotate", + "namespace", + namespace, + "k8s.ovn.org/routing-external-gws-", + "--overwrite", + } + framework.RunKubectlOrDie(namespace, annotateArgs...) +} + +func createAPBExternalRouteCRWithDynamicHop(policyName, targetNamespace, servingNamespace string, bfd bool, gateways []string) { + data := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 +kind: AdminPolicyBasedExternalRoute +metadata: + name: %s +spec: + from: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: %s + nextHops: + dynamic: +%s +`, policyName, targetNamespace, formatDynamicHops(bfd, servingNamespace)) + stdout, err := framework.RunKubectlInput("", data, "create", "-f", "-") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(stdout).To(gomega.Equal(fmt.Sprintf("adminpolicybasedexternalroute.k8s.ovn.org/%s created\n", policyName))) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gwIPs := sets.NewString(gateways...).List() + gomega.Eventually(func() string { + lastMsg, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.messages[-1:]}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return lastMsg + }, time.Minute, 1).Should(gomega.Equal(fmt.Sprintf("Configured external gateway IPs: %s", strings.Join(gwIPs, ",")))) + gomega.Eventually(func() string { + status, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.status}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return status + }, time.Minute, 1).Should(gomega.Equal("Success")) +} +func createAPBExternalRouteCRWithStaticHop(policyName, namespaceName string, bfd bool, gateways ...string) { + + data := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 +kind: AdminPolicyBasedExternalRoute +metadata: + name: %s +spec: + from: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: %s + nextHops: + static: +%s +`, policyName, namespaceName, formatStaticHops(bfd, gateways...)) + stdout, err := framework.RunKubectlInput("", data, "create", "-f", "-", "--save-config") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(stdout).To(gomega.Equal(fmt.Sprintf("adminpolicybasedexternalroute.k8s.ovn.org/%s created\n", policyName))) + gwIPs := sets.NewString(gateways...).List() + gomega.Eventually(func() string { + lastMsg, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.messages[-1:]}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return lastMsg + }, time.Minute, 1).Should(gomega.Equal(fmt.Sprintf("Configured external gateway IPs: %s", strings.Join(gwIPs, ",")))) + gomega.Eventually(func() string { + status, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.status}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return status + }, time.Minute, 1).Should(gomega.Equal("Success")) +} + +func updateAPBExternalRouteCRWithStaticHop(policyName, namespaceName string, bfd bool, gateways ...string) { + + lastUpdatetime, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.lastTransitionTime}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + data := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 +kind: AdminPolicyBasedExternalRoute +metadata: + name: %s +spec: + from: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: %s + nextHops: + static: +%s +`, policyName, namespaceName, formatStaticHops(bfd, gateways...)) + _, err = framework.RunKubectlInput(namespaceName, data, "apply", "-f", "-") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() string { + lastMsg, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.messages[-1:]}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return lastMsg + }, 10).Should(gomega.Equal(fmt.Sprintf("Configured external gateway IPs: %s", strings.Join(gateways, ",")))) + + gomega.Eventually(func() string { + s, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.status}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return s + }, 10).Should(gomega.Equal("Success")) + gomega.Eventually(func() string { + t, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.lastTransitionTime}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return t + }, 10, 1).ShouldNot(gomega.Equal(lastUpdatetime)) + +} + +func deleteAPBExternalRouteCR(policyName string) { + framework.RunKubectl("", "delete", "apbexternalroute", policyName) +} +func formatStaticHops(bfd bool, gateways ...string) string { + b := strings.Builder{} + bfdEnabled := "true" + if !bfd { + bfdEnabled = "false" + } + for _, gateway := range gateways { + b.WriteString(fmt.Sprintf(` - ip: "%s" + bfdEnabled: %s +`, gateway, bfdEnabled)) + } + return b.String() +} + +func formatDynamicHops(bfd bool, servingNamespace string) string { + b := strings.Builder{} + bfdEnabled := "true" + if !bfd { + bfdEnabled = "false" + } + b.WriteString(fmt.Sprintf(` - podSelector: + matchLabels: + gatewayPod: "true" + bfdEnabled: %s + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: %s + networkAttachmentName: foo +`, bfdEnabled, servingNamespace)) + return b.String() +} + +func getGatewayPod(f *framework.Framework, podNamespace, podName string) *v1.Pod { + pod, err := f.ClientSet.CoreV1().Pods(podNamespace).Get(context.Background(), podName, metav1.GetOptions{}) + framework.ExpectNoError(err, fmt.Sprintf("unable to get pod: %s, err: %v", podName, err)) + return pod +} + func hostNamesForContainers(containers []string) map[string]struct{} { res := make(map[string]struct{}) for _, c := range containers { @@ -1341,8 +2782,11 @@ func pokeConntrackEntries(nodeName, podIP, protocol string, patterns []string) i for _, connEntry := range strings.Split(conntrackEntries, "\n") { match := strings.Contains(connEntry, protocol) && strings.Contains(connEntry, podIP) for _, pattern := range patterns { - if strings.Contains(connEntry, pattern) && match { - numOfConnEntries++ + if match { + klog.Infof("%s in %s", pattern, connEntry) + if strings.Contains(connEntry, pattern) { + numOfConnEntries++ + } } } if len(patterns) == 0 && match { @@ -1398,10 +2842,7 @@ EOF func isBFDPaired(container, peer string) bool { res, err := runCommand(containerRuntime, "exec", container, "bash", "-c", fmt.Sprintf("vtysh -c \"show bfd peer %s\"", peer)) framework.ExpectNoError(err, "failed to check bfd status in %s", container) - if strings.Contains(res, "Status: up") { - return true - } - return false + return strings.Contains(res, "Status: up") } // When running on host network we clean the routes and ips we added previously @@ -1444,3 +2885,22 @@ func checkPingOnContainer(container string, srcPodName string, icmpCmd string, w framework.ExpectNoError(err, "Failed to detect icmp messages from %s on gateway %s", srcPodName, container) framework.Logf("ICMP packet successfully detected on gateway %s", container) } + +func resetGatewayAnnotations(f *framework.Framework) { + // remove the routing external annotation + if f == nil || f.Namespace == nil { + return + } + annotations := []string{ + "k8s.ovn.org/routing-external-gws-", + "k8s.ovn.org/bfd-enabled-", + } + ginkgo.By("Resetting the gw annotations") + for _, annotation := range annotations { + framework.RunKubectlOrDie("", []string{ + "annotate", + "namespace", + f.Namespace.Name, + annotation}...) + } +} From 9a40b3d3ebab02a55990f3765d1e890484d7e1fd Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sat, 6 May 2023 21:33:26 +0800 Subject: [PATCH 69/90] Allow external gateway bridge without uplink port In local gateway mode Users can create the external gateway bridge without attaching a host physical interface as the uplink port in local gateway mode. The gateway router will use 169.254.169.4 as the default gateway. Add a new gateway config flag 'allow-no-uplink' to controll if this setup is allowed or not. It's disabled by default. With this setup, egressIP and egressGW can not work. Signed-off-by: Peng Liu --- go-controller/pkg/config/config.go | 7 + go-controller/pkg/config/config_test.go | 5 + go-controller/pkg/node/gateway.go | 9 +- go-controller/pkg/node/gateway_init.go | 34 +- .../pkg/node/gateway_init_linux_test.go | 121 ++++++ go-controller/pkg/node/gateway_shared_intf.go | 402 ++++++++++-------- go-controller/pkg/node/helper_linux.go | 18 +- 7 files changed, 396 insertions(+), 200 deletions(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index a0aa868eff..ed81bb719a 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -406,6 +406,8 @@ type GatewayConfig struct { SingleNode bool `gcfg:"single-node"` // DisableForwarding (enabled by default) controls if forwarding is allowed on OVNK controlled interfaces DisableForwarding bool `gcfg:"disable-forwarding"` + // AllowNoUplink (disabled by default) controls if the external gateway bridge without an uplink port is allowed in local gateway mode. + AllowNoUplink bool `gcfg:"allow-no-uplink"` } // OvnAuthConfig holds client authentication and location details for @@ -1273,6 +1275,11 @@ var OVNGatewayFlags = []cli.Flag{ "Single node indicates a one node cluster and allows to simplify ovn-kubernetes gateway logic", Destination: &cliConfig.Gateway.SingleNode, }, + &cli.BoolFlag{ + Name: "allow-no-uplink", + Usage: "Allow the external gateway bridge without an uplink port in local gateway mode", + Destination: &cliConfig.Gateway.AllowNoUplink, + }, // Deprecated CLI options &cli.BoolFlag{ Name: "init-gateways", diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 80e7fcad21..1b635cfd23 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -204,6 +204,7 @@ v6-join-subnet=fd90::/64 router-subnet=10.50.0.0/16 single-node=false disable-forwarding=true +allow-no-uplink=false [hybridoverlay] enabled=true @@ -310,6 +311,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("")) gomega.Expect(Gateway.SingleNode).To(gomega.BeFalse()) gomega.Expect(Gateway.DisableForwarding).To(gomega.BeFalse()) + gomega.Expect(Gateway.AllowNoUplink).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(1)) gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(0)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeFalse()) @@ -625,6 +627,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("10.50.0.0/16")) gomega.Expect(Gateway.SingleNode).To(gomega.BeFalse()) gomega.Expect(Gateway.DisableForwarding).To(gomega.BeTrue()) + gomega.Expect(Gateway.AllowNoUplink).To(gomega.BeFalse()) gomega.Expect(HybridOverlay.Enabled).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(3)) @@ -713,6 +716,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("10.55.0.0/16")) gomega.Expect(Gateway.SingleNode).To(gomega.BeTrue()) gomega.Expect(Gateway.DisableForwarding).To(gomega.BeTrue()) + gomega.Expect(Gateway.AllowNoUplink).To(gomega.BeTrue()) gomega.Expect(HybridOverlay.Enabled).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(5)) @@ -767,6 +771,7 @@ var _ = Describe("Config Operations", func() { "-gateway-router-subnet=10.55.0.0/16", "-single-node", "-disable-forwarding", + "-allow-no-uplink", "-enable-hybrid-overlay", "-hybrid-overlay-cluster-subnets=11.132.0.0/14/23", "-monitor-all=false", diff --git a/go-controller/pkg/node/gateway.go b/go-controller/pkg/node/gateway.go index cc0184bb75..cc5b5c05a3 100644 --- a/go-controller/pkg/node/gateway.go +++ b/go-controller/pkg/node/gateway.go @@ -431,10 +431,15 @@ func bridgeForInterface(intfName, nodeName, physicalNetworkName string, gwIPs [] // gateway interface is an OVS bridge uplinkName, err := getIntfName(intfName) if err != nil { - return nil, errors.Wrapf(err, "Failed to find intfName for %s", intfName) + if config.Gateway.Mode == config.GatewayModeLocal && config.Gateway.AllowNoUplink { + klog.Infof("Could not find uplink for %s, setup gateway bridge with no uplink port, egress IP and egress GW will not work", intfName) + } else { + return nil, errors.Wrapf(err, "Failed to find intfName for %s", intfName) + } + } else { + res.uplinkName = uplinkName } res.bridgeName = intfName - res.uplinkName = uplinkName } var err error // Now, we get IP addresses for the bridge diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index de4fbfa9e6..73f1760504 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -1,6 +1,7 @@ package node import ( + "errors" "fmt" "net" "strings" @@ -189,7 +190,32 @@ func getGatewayNextHops() ([]net.IP, string, error) { if needIPv4NextHop || needIPv6NextHop || gatewayIntf == "" { defaultGatewayIntf, defaultGatewayNextHops, err := getDefaultGatewayInterfaceDetails(gatewayIntf, config.IPv4Mode, config.IPv6Mode) if err != nil { - return nil, "", err + if !(errors.As(err, new(*GatewayInterfaceMismatchError)) && config.Gateway.Mode == config.GatewayModeLocal && config.Gateway.AllowNoUplink) { + return nil, "", err + } + } + if gatewayIntf == "" { + if defaultGatewayIntf == "" { + return nil, "", fmt.Errorf("unable to find default gateway and none provided via config") + } + gatewayIntf = defaultGatewayIntf + } else { + if gatewayIntf != defaultGatewayIntf || len(defaultGatewayNextHops) == 0 { + if config.Gateway.Mode == config.GatewayModeLocal && config.Gateway.AllowNoUplink { + // For local gw, if not default gateway is available or the provide gateway interface is not the host gateway interface + // use nexthop masquerade IP as GR default gw to steer traffic to the gateway bridge + if needIPv4NextHop { + nexthop := net.ParseIP(types.V4DummyNextHopMasqueradeIP) + gatewayNextHops = append(gatewayNextHops, nexthop) + needIPv4NextHop = false + } + if needIPv6NextHop { + nexthop := net.ParseIP(types.V6DummyNextHopMasqueradeIP) + gatewayNextHops = append(gatewayNextHops, nexthop) + needIPv6NextHop = false + } + } + } } if needIPv4NextHop || needIPv6NextHop { for _, defaultGatewayNextHop := range defaultGatewayNextHops { @@ -200,12 +226,6 @@ func getGatewayNextHops() ([]net.IP, string, error) { } } } - if gatewayIntf == "" { - if defaultGatewayIntf == "" { - return nil, "", fmt.Errorf("unable to find default gateway and none provided via config") - } - gatewayIntf = defaultGatewayIntf - } } return gatewayNextHops, gatewayIntf, nil } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 3e0ed67a76..b73f83c901 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -6,6 +6,7 @@ package node import ( "bytes" "context" + "errors" "fmt" "net" "runtime" @@ -1750,5 +1751,125 @@ var _ = Describe("Gateway unit tests", func() { Expect(gatewayIntf).To(Equal(ifName)) Expect(gatewayNextHops[0]).To(Equal(gwIPs[0])) }) + + Context("In Local GW mode", func() { + ovntest.OnSupportedPlatformsIt("Finds correct gateway interface and nexthops when dummy gateway bridge is created", func() { + ifName := "enf1f0" + dummyBridgeName := "br-ex" + _, ipnet, err := net.ParseCIDR("0.0.0.0/0") + Expect(err).ToNot(HaveOccurred()) + hostGwIPs := []net.IP{net.ParseIP("10.0.0.11")} + lnk := &linkMock.Link{} + lnkAttr := &netlink.LinkAttrs{ + Name: ifName, + Index: 5, + } + defaultRoute := &netlink.Route{ + Dst: ipnet, + LinkIndex: 5, + Scope: netlink.SCOPE_UNIVERSE, + Gw: hostGwIPs[0], + MTU: config.Default.MTU, + } + lnk.On("Attrs").Return(lnkAttr) + netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) + netlinkMock.On("LinkByIndex", mock.Anything).Return(lnk, nil) + netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*defaultRoute}, nil) + + fexec := ovntest.NewLooseCompareFakeExec() + fexec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: fmt.Sprintf("ovs-vsctl --timeout=15 port-to-br %s", ifName), + Err: fmt.Errorf(""), + Output: "", + }) + err = util.SetExec(fexec) + Expect(err).NotTo(HaveOccurred()) + + gwIPs := []net.IP{net.ParseIP(types.V4DummyNextHopMasqueradeIP)} + config.Gateway.Interface = dummyBridgeName + config.Gateway.Mode = config.GatewayModeLocal + config.Gateway.AllowNoUplink = true + + gatewayNextHops, gatewayIntf, err := getGatewayNextHops() + Expect(err).NotTo(HaveOccurred()) + Expect(gatewayIntf).To(Equal(dummyBridgeName)) + Expect(gatewayNextHops[0]).To(Equal(gwIPs[0])) + }) + + ovntest.OnSupportedPlatformsIt("Finds correct gateway interface and nexthops when dummy gateway bridge is created and no default route", func() { + ifName := "enf1f0" + dummyBridgeName := "br-ex" + lnk := &linkMock.Link{} + lnkAttr := &netlink.LinkAttrs{ + Name: ifName, + Index: 5, + } + + lnk.On("Attrs").Return(lnkAttr) + netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) + netlinkMock.On("LinkByIndex", mock.Anything).Return(lnk, nil) + netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{}, nil) + + fexec := ovntest.NewLooseCompareFakeExec() + fexec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: fmt.Sprintf("ovs-vsctl --timeout=15 port-to-br %s", ifName), + Err: fmt.Errorf(""), + Output: "", + }) + err := util.SetExec(fexec) + Expect(err).NotTo(HaveOccurred()) + + gwIPs := []net.IP{net.ParseIP(types.V4DummyNextHopMasqueradeIP)} + config.Gateway.Interface = dummyBridgeName + config.Gateway.Mode = config.GatewayModeLocal + config.Gateway.AllowNoUplink = true + + gatewayNextHops, gatewayIntf, err := getGatewayNextHops() + Expect(err).NotTo(HaveOccurred()) + Expect(gatewayIntf).To(Equal(dummyBridgeName)) + Expect(gatewayNextHops[0]).To(Equal(gwIPs[0])) + }) + + ovntest.OnSupportedPlatformsIt("Returns error when dummy gateway bridge is created without allow-no-uplink flag", func() { + ifName := "enf1f0" + dummyBridgeName := "br-ex" + _, ipnet, err := net.ParseCIDR("0.0.0.0/0") + Expect(err).ToNot(HaveOccurred()) + hostGwIPs := []net.IP{net.ParseIP("10.0.0.11")} + lnk := &linkMock.Link{} + lnkAttr := &netlink.LinkAttrs{ + Name: ifName, + Index: 5, + } + defaultRoute := &netlink.Route{ + Dst: ipnet, + LinkIndex: 5, + Scope: netlink.SCOPE_UNIVERSE, + Gw: hostGwIPs[0], + MTU: config.Default.MTU, + } + lnk.On("Attrs").Return(lnkAttr) + netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) + netlinkMock.On("LinkByIndex", mock.Anything).Return(lnk, nil) + netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*defaultRoute}, nil) + + fexec := ovntest.NewLooseCompareFakeExec() + fexec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: fmt.Sprintf("ovs-vsctl --timeout=15 port-to-br %s", ifName), + Err: fmt.Errorf(""), + Output: "", + }) + err = util.SetExec(fexec) + Expect(err).NotTo(HaveOccurred()) + + config.Gateway.Interface = dummyBridgeName + config.Gateway.Mode = config.GatewayModeLocal + + gatewayNextHops, gatewayIntf, err := getGatewayNextHops() + Expect(errors.As(err, new(*GatewayInterfaceMismatchError))).To(BeTrue()) + Expect(gatewayIntf).To(Equal("")) + Expect(len(gatewayNextHops)).To(Equal(0)) + }) + }) }) }) diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index c2aaea2fed..692b4adbc5 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -1087,20 +1087,21 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st if config.IPv4Mode { // table0, Geneve packets coming from external. Skip conntrack and go directly to host // if dest mac is the shared mac send directly to host. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=205, in_port=%s, dl_dst=%s, udp, udp_dst=%d, "+ - "actions=output:%s", defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, config.Default.EncapPort, - ofPortHost)) - // perform NORMAL action otherwise. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+ - "actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort)) - - // table0, Geneve packets coming from LOCAL. Skip conntrack and go directly to external - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+ - "actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys)) + if ofPortPhys != "" { + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=205, in_port=%s, dl_dst=%s, udp, udp_dst=%d, "+ + "actions=output:%s", defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, config.Default.EncapPort, + ofPortHost)) + // perform NORMAL action otherwise. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+ + "actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort)) + // table0, Geneve packets coming from LOCAL. Skip conntrack and go directly to external + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+ + "actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys)) + } physicalIP, err := util.MatchFirstIPNetFamily(false, bridgeIPs) if err != nil { return nil, fmt.Errorf("unable to determine IPv4 physical IP of host: %v", err) @@ -1141,21 +1142,23 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st defaultOpenFlowCookie, ofPortHost, types.V4OVNMasqueradeIP, OVNMasqCTZone)) } if config.IPv6Mode { - // table0, Geneve packets coming from external. Skip conntrack and go directly to host - // if dest mac is the shared mac send directly to host. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=205, in_port=%s, dl_dst=%s, udp6, udp_dst=%d, "+ - "actions=output:%s", defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, config.Default.EncapPort, - ofPortHost)) - // perform NORMAL action otherwise. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp6, udp_dst=%d, "+ - "actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort)) + if ofPortPhys != "" { + // table0, Geneve packets coming from external. Skip conntrack and go directly to host + // if dest mac is the shared mac send directly to host. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=205, in_port=%s, dl_dst=%s, udp6, udp_dst=%d, "+ + "actions=output:%s", defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, config.Default.EncapPort, + ofPortHost)) + // perform NORMAL action otherwise. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp6, udp_dst=%d, "+ + "actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort)) - // table0, Geneve packets coming from LOCAL. Skip conntrack and send to external - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp6, udp_dst=%d, "+ - "actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys)) + // table0, Geneve packets coming from LOCAL. Skip conntrack and send to external + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp6, udp_dst=%d, "+ + "actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys)) + } physicalIP, err := util.MatchFirstIPNetFamily(true, bridgeIPs) if err != nil { @@ -1233,59 +1236,61 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st actions := fmt.Sprintf("output:%s", ofPortPatch) - if config.IPv4Mode { - // table 1, established and related connections in zone 64000 with ct_mark ctMarkOVN go to OVN - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ctMarkOVN, actions)) + if ofPortPhys != "" { + if config.IPv4Mode { + // table 1, established and related connections in zone 64000 with ct_mark ctMarkOVN go to OVN + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ctMarkOVN, actions)) - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ctMarkOVN, actions)) + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ctMarkOVN, actions)) - // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ + "actions=output:%s", + defaultOpenFlowCookie, ctMarkHost, ofPortHost)) - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) - } + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=output:%s", + defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + } - if config.IPv6Mode { - // table 1, established and related connections in zone 64000 with ct_mark ctMarkOVN go to OVN - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ipv6, ct_state=+trk+est, ct_mark=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ctMarkOVN, actions)) + if config.IPv6Mode { + // table 1, established and related connections in zone 64000 with ct_mark ctMarkOVN go to OVN + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ipv6, ct_state=+trk+est, ct_mark=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ctMarkOVN, actions)) - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ipv6, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ctMarkOVN, actions)) + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ipv6, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ctMarkOVN, actions)) - // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+est, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+est, ct_mark=%s, "+ + "actions=output:%s", + defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=output:%s", + defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + } + // table 1, we check to see if this dest mac is the shared mac, if so send to host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", + defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) } - // table 1, we check to see if this dest mac is the shared mac, if so send to host - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", - defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) - // table 2, dispatch from Host -> OVN dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, table=2, "+ @@ -1336,40 +1341,43 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, e var dftFlows []string - // table 0, we check to see if this dest mac is the shared mac, if so flood to both ports - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=0, in_port=%s, dl_dst=%s, actions=output:%s,output:%s", - defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, ofPortPatch, ofPortHost)) + if ofPortPhys != "" { + // table 0, we check to see if this dest mac is the shared mac, if so flood to both ports + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=10, table=0, in_port=%s, dl_dst=%s, actions=output:%s,output:%s", + defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, ofPortPatch, ofPortHost)) + } if config.IPv4Mode { physicalIP, err := util.MatchFirstIPNetFamily(false, bridgeIPs) if err != nil { return nil, fmt.Errorf("unable to determine IPv4 physical IP of host: %v", err) } - // table0, packets coming from egressIP pods that have mark 1008 on them - // will be DNAT-ed a final time into nodeIP to maintain consistency in traffic even if the GR - // DNATs these into egressIP prior to reaching external bridge. - // egressService pods will also undergo this SNAT to nodeIP since these features are tied - // together at the OVN policy level on the distributed router. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=105, in_port=%s, ip, pkt_mark=%s "+ - "actions=ct(commit, zone=%d, nat(src=%s), exec(set_field:%s->ct_mark)),output:%s", - defaultOpenFlowCookie, ofPortPatch, ovnKubeNodeSNATMark, config.Default.ConntrackZone, physicalIP.IP, ctMarkOVN, ofPortPhys)) - - // table 0, packets coming from pods headed externally. Commit connections with ct_mark ctMarkOVN - // so that reverse direction goes back to the pods. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortPatch, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) + if ofPortPhys != "" { + // table0, packets coming from egressIP pods that have mark 1008 on them + // will be DNAT-ed a final time into nodeIP to maintain consistency in traffic even if the GR + // DNATs these into egressIP prior to reaching external bridge. + // egressService pods will also undergo this SNAT to nodeIP since these features are tied + // together at the OVN policy level on the distributed router. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=105, in_port=%s, ip, pkt_mark=%s "+ + "actions=ct(commit, zone=%d, nat(src=%s), exec(set_field:%s->ct_mark)),output:%s", + defaultOpenFlowCookie, ofPortPatch, ovnKubeNodeSNATMark, config.Default.ConntrackZone, physicalIP.IP, ctMarkOVN, ofPortPhys)) - // table 0, packets coming from host Commit connections with ct_mark ctMarkHost - // so that reverse direction goes back to the host. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + // table 0, packets coming from pods headed externally. Commit connections with ct_mark ctMarkOVN + // so that reverse direction goes back to the pods. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortPatch, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) + // table 0, packets coming from host Commit connections with ct_mark ctMarkHost + // so that reverse direction goes back to the host. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + } if config.Gateway.Mode == config.GatewayModeLocal { // table 0, any packet coming from OVN send to host in LGW mode, host will take care of sending it outside if needed. // exceptions are traffic for egressIP and egressGW features and ICMP related traffic which will hit the priority 100 flow instead of this. @@ -1386,46 +1394,51 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, e "actions=ct(table=4,zone=%d)", defaultOpenFlowCookie, ofPortPatch, physicalIP.IP, HostMasqCTZone)) // We send BFD traffic coming from OVN to outside directly using a higher priority flow - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=650, table=0, in_port=%s, udp, tp_dst=3784, actions=output:%s", - defaultOpenFlowCookie, ofPortPatch, ofPortPhys)) + if ofPortPhys != "" { + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=650, table=0, in_port=%s, udp, tp_dst=3784, actions=output:%s", + defaultOpenFlowCookie, ofPortPatch, ofPortPhys)) + } } - // table 0, packets coming from external. Send it through conntrack and - // resubmit to table 1 to know the state and mark of the connection. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=50, in_port=%s, ip, "+ - "actions=ct(zone=%d, nat, table=1)", defaultOpenFlowCookie, ofPortPhys, config.Default.ConntrackZone)) + if ofPortPhys != "" { + // table 0, packets coming from external. Send it through conntrack and + // resubmit to table 1 to know the state and mark of the connection. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=50, in_port=%s, ip, "+ + "actions=ct(zone=%d, nat, table=1)", defaultOpenFlowCookie, ofPortPhys, config.Default.ConntrackZone)) + } } if config.IPv6Mode { physicalIP, err := util.MatchFirstIPNetFamily(true, bridgeIPs) if err != nil { return nil, fmt.Errorf("unable to determine IPv6 physical IP of host: %v", err) } - // table0, packets coming from egressIP pods that have mark 1008 on them - // will be DNAT-ed a final time into nodeIP to maintain consistency in traffic even if the GR - // DNATs these into egressIP prior to reaching external bridge. - // egressService pods will also undergo this SNAT to nodeIP since these features are tied - // together at the OVN policy level on the distributed router. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=105, in_port=%s, ipv6, pkt_mark=%s "+ - "actions=ct(commit, zone=%d, nat(src=%s), exec(set_field:%s->ct_mark)),output:%s", - defaultOpenFlowCookie, ofPortPatch, ovnKubeNodeSNATMark, config.Default.ConntrackZone, physicalIP.IP, ctMarkOVN, ofPortPhys)) - - // table 0, packets coming from pods headed externally. Commit connections with ct_mark ctMarkOVN - // so that reverse direction goes back to the pods. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortPatch, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) + if ofPortPhys != "" { + // table0, packets coming from egressIP pods that have mark 1008 on them + // will be DNAT-ed a final time into nodeIP to maintain consistency in traffic even if the GR + // DNATs these into egressIP prior to reaching external bridge. + // egressService pods will also undergo this SNAT to nodeIP since these features are tied + // together at the OVN policy level on the distributed router. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=105, in_port=%s, ipv6, pkt_mark=%s "+ + "actions=ct(commit, zone=%d, nat(src=%s), exec(set_field:%s->ct_mark)),output:%s", + defaultOpenFlowCookie, ofPortPatch, ovnKubeNodeSNATMark, config.Default.ConntrackZone, physicalIP.IP, ctMarkOVN, ofPortPhys)) - // table 0, packets coming from host. Commit connections with ct_mark ctMarkHost - // so that reverse direction goes back to the host. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + // table 0, packets coming from pods headed externally. Commit connections with ct_mark ctMarkOVN + // so that reverse direction goes back to the pods. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortPatch, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) + // table 0, packets coming from host. Commit connections with ct_mark ctMarkHost + // so that reverse direction goes back to the host. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + } if config.Gateway.Mode == config.GatewayModeLocal { // table 0, any packet coming from OVN send to host in LGW mode, host will take care of sending it outside if needed. // exceptions are traffic for egressIP and egressGW features and ICMP related traffic which will hit the priority 100 flow instead of this. @@ -1441,18 +1454,21 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, e fmt.Sprintf("cookie=%s, priority=175, in_port=%s, sctp6, ipv6_src=%s, "+ "actions=ct(table=4,zone=%d)", defaultOpenFlowCookie, ofPortPatch, physicalIP.IP, HostMasqCTZone)) - // We send BFD traffic coming from OVN to outside directly using a higher priority flow + if ofPortPhys != "" { + // We send BFD traffic coming from OVN to outside directly using a higher priority flow + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=650, table=0, in_port=%s, udp6, tp_dst=3784, actions=output:%s", + defaultOpenFlowCookie, ofPortPatch, ofPortPhys)) + } + } + if ofPortPhys != "" { + // table 0, packets coming from external. Send it through conntrack and + // resubmit to table 1 to know the state and mark of the connection. dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=650, table=0, in_port=%s, udp6, tp_dst=3784, actions=output:%s", - defaultOpenFlowCookie, ofPortPatch, ofPortPhys)) + fmt.Sprintf("cookie=%s, priority=50, in_port=%s, ipv6, "+ + "actions=ct(zone=%d, nat, table=1)", defaultOpenFlowCookie, ofPortPhys, config.Default.ConntrackZone)) } - // table 0, packets coming from external. Send it through conntrack and - // resubmit to table 1 to know the state and mark of the connection. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=50, in_port=%s, ipv6, "+ - "actions=ct(zone=%d, nat, table=1)", defaultOpenFlowCookie, ofPortPhys, config.Default.ConntrackZone)) } - // Egress IP is often configured on a node different from the one hosting the affected pod. // Due to the fact that ovn-controllers on different nodes apply the changes independently, // there is a chance that the pod traffic will reach the egress node before it configures the SNAT flows. @@ -1474,65 +1490,71 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, e if utilnet.IsIPv6CIDR(subnet) { ipPrefix = "ipv6" } - // table 0, commit connections from local pods. - // ICNIv2 requires that local pod traffic can leave the node without SNAT. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=109, in_port=%s, %s, %s_src=%s"+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortPatch, ipPrefix, ipPrefix, subnet, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) - } - } - - actions := fmt.Sprintf("output:%s", ofPortPatch) - - if config.Gateway.DisableSNATMultipleGWs { - // table 1, traffic to pod subnet go directly to OVN - for _, clusterEntry := range config.Default.ClusterSubnets { - cidr := clusterEntry.CIDR - var ipPrefix string - if utilnet.IsIPv6CIDR(cidr) { - ipPrefix = "ipv6" - } else { - ipPrefix = "ip" + if ofPortPhys != "" { + // table 0, commit connections from local pods. + // ICNIv2 requires that local pod traffic can leave the node without SNAT. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=109, in_port=%s, %s, %s_src=%s"+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortPatch, ipPrefix, ipPrefix, subnet, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) } - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=15, table=1, %s, %s_dst=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ipPrefix, ipPrefix, cidr, actions)) } } - // table 1, we check to see if this dest mac is the shared mac, if so send to host - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", - defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) + if ofPortPhys != "" { + actions := fmt.Sprintf("output:%s", ofPortPatch) - if config.IPv6Mode { - // REMOVEME(trozet) when https://bugzilla.kernel.org/show_bug.cgi?id=11797 is resolved - // must flood icmpv6 Route Advertisement and Neighbor Advertisement traffic as it fails to create a CT entry - for _, icmpType := range []int{types.RouteAdvertisementICMPType, types.NeighborAdvertisementICMPType} { - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=14, table=1,icmp6,icmpv6_type=%d actions=FLOOD", - defaultOpenFlowCookie, icmpType)) + if config.Gateway.DisableSNATMultipleGWs { + // table 1, traffic to pod subnet go directly to OVN + for _, clusterEntry := range config.Default.ClusterSubnets { + cidr := clusterEntry.CIDR + var ipPrefix string + if utilnet.IsIPv6CIDR(cidr) { + ipPrefix = "ipv6" + } else { + ipPrefix = "ip" + } + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=15, table=1, %s, %s_dst=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ipPrefix, ipPrefix, cidr, actions)) + } } - // We send BFD traffic both on the host and in ovn + // table 1, we check to see if this dest mac is the shared mac, if so send to host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=13, table=1, in_port=%s, udp6, tp_dst=3784, actions=output:%s,output:%s", - defaultOpenFlowCookie, ofPortPhys, ofPortPatch, ofPortHost)) - } + fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", + defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) + + if config.IPv6Mode { + // REMOVEME(trozet) when https://bugzilla.kernel.org/show_bug.cgi?id=11797 is resolved + // must flood icmpv6 Route Advertisement and Neighbor Advertisement traffic as it fails to create a CT entry + for _, icmpType := range []int{types.RouteAdvertisementICMPType, types.NeighborAdvertisementICMPType} { + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=14, table=1,icmp6,icmpv6_type=%d actions=FLOOD", + defaultOpenFlowCookie, icmpType)) + } + if ofPortPhys != "" { + // We send BFD traffic both on the host and in ovn + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=13, table=1, in_port=%s, udp6, tp_dst=3784, actions=output:%s,output:%s", + defaultOpenFlowCookie, ofPortPhys, ofPortPatch, ofPortHost)) + } + } - if config.IPv4Mode { - // We send BFD traffic both on the host and in ovn + if config.IPv4Mode { + if ofPortPhys != "" { + // We send BFD traffic both on the host and in ovn + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=13, table=1, in_port=%s, udp, tp_dst=3784, actions=output:%s,output:%s", + defaultOpenFlowCookie, ofPortPhys, ofPortPatch, ofPortHost)) + } + } + // table 1, all other connections do normal processing dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=13, table=1, in_port=%s, udp, tp_dst=3784, actions=output:%s,output:%s", - defaultOpenFlowCookie, ofPortPhys, ofPortPatch, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=0, table=1, actions=output:NORMAL", defaultOpenFlowCookie)) } - // table 1, all other connections do normal processing - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=0, table=1, actions=output:NORMAL", defaultOpenFlowCookie)) - return dftFlows, nil } @@ -1543,15 +1565,17 @@ func setBridgeOfPorts(bridge *bridgeConfiguration) error { return fmt.Errorf("failed while waiting on patch port %q to be created by ovn-controller and "+ "while getting ofport. stderr: %q, error: %v", bridge.patchPort, stderr, err) } + bridge.ofPortPatch = ofportPatch - // Get ofport of physical interface - ofportPhys, stderr, err := util.GetOVSOfPort("get", "interface", bridge.uplinkName, "ofport") - if err != nil { - return fmt.Errorf("failed to get ofport of %s, stderr: %q, error: %v", - bridge.uplinkName, stderr, err) + if bridge.uplinkName != "" { + // Get ofport of physical interface + ofportPhys, stderr, err := util.GetOVSOfPort("get", "interface", bridge.uplinkName, "ofport") + if err != nil { + return fmt.Errorf("failed to get ofport of %s, stderr: %q, error: %v", + bridge.uplinkName, stderr, err) + } + bridge.ofPortPhys = ofportPhys } - bridge.ofPortPatch = ofportPatch - bridge.ofPortPhys = ofportPhys // Get ofport represeting the host. That is, host representor port in case of DPUs, ovsLocalPort otherwise. if config.OvnKubeNode.Mode == types.NodeModeDPU { diff --git a/go-controller/pkg/node/helper_linux.go b/go-controller/pkg/node/helper_linux.go index ea86b10814..79458eeb0b 100644 --- a/go-controller/pkg/node/helper_linux.go +++ b/go-controller/pkg/node/helper_linux.go @@ -13,6 +13,20 @@ import ( "k8s.io/klog/v2" ) +type GatewayInterfaceMismatchError struct { + msg string +} + +func (error *GatewayInterfaceMismatchError) Error() string { + return error.msg +} + +func newGatewayInterfaceMismatchError(format string, args ...interface{}) *GatewayInterfaceMismatchError { + return &GatewayInterfaceMismatchError{ + msg: fmt.Sprintf(format, args...), + } +} + // getDefaultGatewayInterfaceDetails returns the interface name on // which the default gateway (for route to 0.0.0.0) is configured. // optionally pass the pre-determined gateway interface @@ -103,7 +117,7 @@ func getDefaultGatewayInterfaceByFamily(family int, gwIface string) (string, net klog.Infof("Found default gateway interface %s %s", foundIfName, r.Gw.String()) if len(gwIface) > 0 && gwIface != foundIfName { // this should not happen, but if it did, indicates something broken with our use of the netlink lib - return "", nil, fmt.Errorf("mistmaching provided gw interface: %s and gateway found: %s", + return "", nil, newGatewayInterfaceMismatchError("mismatching provided gw interface: %s and gateway found: %s", gwIface, foundIfName) } return foundIfName, r.Gw, nil @@ -129,7 +143,7 @@ func getDefaultGatewayInterfaceByFamily(family int, gwIface string) (string, net klog.Infof("Found default gateway interface %s %s", foundIfName, nh.Gw.String()) if len(gwIface) > 0 && gwIface != foundIfName { // this should not happen, but if it did, indicates something broken with our use of the netlink lib - return "", nil, fmt.Errorf("mistmaching provided gw interface: %q and gateway found: %q", + return "", nil, newGatewayInterfaceMismatchError("mismatching provided gw interface: %q and gateway found: %q", gwIface, foundIfName) } return foundIfName, nh.Gw, nil From 45d8aecd584f52cf979c30e812c702c30005d692 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 24 May 2023 10:45:59 +0800 Subject: [PATCH 70/90] Use no-uplink gateway bridge in compact-mode e2e test Signed-off-by: Peng Liu --- .github/workflows/test.yml | 1 + contrib/kind.sh | 10 +++++++++- dist/images/daemonset.sh | 11 +++++++++++ dist/images/ovnkube.sh | 2 +- dist/templates/ovnkube-master.yaml.j2 | 14 ++++++++++++++ dist/templates/ovnkube-node.yaml.j2 | 13 ++++++++++++- 6 files changed, 48 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ba83051518..5b5c877c8b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -399,6 +399,7 @@ jobs: ENABLE_MULTI_NET: "${{ matrix.target == 'multi-homing' }}" OVN_SEPARATE_CLUSTER_MANAGER: "${{ matrix.separate-cluster-manager == 'true' }}" OVN_COMPACT_MODE: "${{ matrix.target == 'compact-mode' }}" + OVN_DUMMY_GATEWAY_BRIDGE: "${{ matrix.target == 'compact-mode' }}" steps: - name: Free up disk space diff --git a/contrib/kind.sh b/contrib/kind.sh index 9a9060a5bf..e451cc6d2f 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -249,6 +249,8 @@ parse_args() { fi OVN_GATEWAY_MODE=$1 ;; + -dgb | --dummy-gateway-bridge) OVN_DUMMY_GATEWAY_BRIDGE=true + ;; -ov | --ovn-image ) shift OVN_IMAGE=$1 ;; @@ -361,6 +363,7 @@ print_params() { echo "KIND_ALLOW_SYSTEM_WRITES = $KIND_ALLOW_SYSTEM_WRITES" echo "KIND_EXPERIMENTAL_PROVIDER = $KIND_EXPERIMENTAL_PROVIDER" echo "OVN_GATEWAY_MODE = $OVN_GATEWAY_MODE" + echo "OVN_DUMMY_GATEWAY_BRIDGE = $OVN_DUMMY_GATEWAY_BRIDGE" echo "OVN_HYBRID_OVERLAY_ENABLE = $OVN_HYBRID_OVERLAY_ENABLE" echo "OVN_DISABLE_SNAT_MULTIPLE_GWS = $OVN_DISABLE_SNAT_MULTIPLE_GWS" echo "OVN_DISABLE_FORWARDING = $OVN_DISABLE_FORWARDING" @@ -543,10 +546,14 @@ set_default_params() { OVN_DEPLOY_PODS=${OVN_DEPLOY_PODS:-"ovnkube-master ovnkube-node"} OVN_METRICS_SCALE_ENABLE=${OVN_METRICS_SCALE_ENABLE:-false} OVN_ISOLATED=${OVN_ISOLATED:-false} - OVN_GATEWAY_OPTS="" + OVN_GATEWAY_OPTS=${OVN_GATEWAY_OPTS:-""} if [ "$OVN_ISOLATED" == true ]; then OVN_GATEWAY_OPTS="--gateway-interface=eth0" fi + OVN_DUMMY_GATEWAY_BRIDGE=${OVN_DUMMY_GATEWAY_BRIDGE:-false} + if [ "$OVN_DUMMY_GATEWAY_BRIDGE" == true ]; then + OVN_GATEWAY_OPTS="--allow-no-uplink --gateway-interface=br-ex" + fi ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} OVN_SEPARATE_CLUSTER_MANAGER=${OVN_SEPARATE_CLUSTER_MANAGER:-false} OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} @@ -780,6 +787,7 @@ create_ovn_kube_manifests() { --net-cidr="${NET_CIDR}" \ --svc-cidr="${SVC_CIDR}" \ --gateway-mode="${OVN_GATEWAY_MODE}" \ + --dummy-gateway-bridge="${OVN_DUMMY_GATEWAY_BRIDGE}" \ --gateway-options="${OVN_GATEWAY_OPTS}" \ --enable-ipsec="${ENABLE_IPSEC}" \ --hybrid-enabled="${OVN_HYBRID_OVERLAY_ENABLE}" \ diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index 5711ee3017..b5865a7e93 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -31,6 +31,7 @@ OVN_SVC_CIDR="" OVN_K8S_APISERVER="" OVN_GATEWAY_MODE="" OVN_GATEWAY_OPTS="" +OVN_DUMMY_GATEWAY_BRIDGE="" OVN_DB_REPLICAS="" OVN_MTU="" OVN_SSL_ENABLE="" @@ -108,6 +109,9 @@ while [ "$1" != "" ]; do --gateway-options) OVN_GATEWAY_OPTS=$VALUE ;; + --dummy-gateway-bridge) + OVN_DUMMY_GATEWAY_BRIDGE=$VALUE + ;; --enable-ipsec) ENABLE_IPSEC=$VALUE ;; @@ -328,6 +332,9 @@ echo "ovn_gateway_mode: ${ovn_gateway_mode}" ovn_gateway_opts=${OVN_GATEWAY_OPTS} echo "ovn_gateway_opts: ${ovn_gateway_opts}" +ovn_dummy_gateway_bridge=${OVN_DUMMY_GATEWAY_BRIDGE} +echo "ovn_dummy_gateway_bridge: ${ovn_dummy_gateway_bridge}" + enable_ipsec=${ENABLE_IPSEC:-false} echo "enable_ipsec: ${enable_ipsec}" @@ -450,6 +457,7 @@ ovn_image=${ovnkube_image} \ ovn_unprivileged_mode=${ovn_unprivileged_mode} \ ovn_gateway_mode=${ovn_gateway_mode} \ ovn_gateway_opts=${ovn_gateway_opts} \ + ovn_dummy_gateway_bridge=${ovn_dummy_gateway_bridge} \ ovnkube_node_loglevel=${node_loglevel} \ ovn_loglevel_controller=${ovn_loglevel_controller} \ ovnkube_logfile_maxsize=${ovnkube_logfile_maxsize} \ @@ -495,6 +503,7 @@ ovn_image=${image} \ ovn_unprivileged_mode=${ovn_unprivileged_mode} \ ovn_gateway_mode=${ovn_gateway_mode} \ ovn_gateway_opts=${ovn_gateway_opts} \ + ovn_dummy_gateway_bridge=${ovn_dummy_gateway_bridge} \ ovnkube_node_loglevel=${node_loglevel} \ ovn_loglevel_controller=${ovn_loglevel_controller} \ ovnkube_logfile_maxsize=${ovnkube_logfile_maxsize} \ @@ -550,6 +559,8 @@ ovn_image=${ovnkube_image} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_master_count=${ovn_master_count} \ ovn_gateway_mode=${ovn_gateway_mode} \ + ovn_gateway_opts=${ovn_gateway_opts} \ + ovn_dummy_gateway_bridge=${ovn_dummy_gateway_bridge} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ ovn_stateless_netpol_enable=${ovn_netpol_acl_enable} \ ovnkube_compact_mode_enable=${ovnkube_compact_mode_enable} \ diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index e56d4dbf88..a3c6203aa0 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -1047,7 +1047,7 @@ ovn-master() { ${init_node_flags} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --nb-address=${ovn_nbdb} --sb-address=${ovn_sbdb} \ - --gateway-mode=${ovn_gateway_mode} \ + --gateway-mode=${ovn_gateway_mode} ${ovn_gateway_opts} \ --loglevel=${ovnkube_loglevel} \ --logfile-maxsize=${ovnkube_logfile_maxsize} \ --logfile-maxbackups=${ovnkube_logfile_maxbackups} \ diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index 59d47d1f10..01301b5cf5 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -138,6 +138,18 @@ spec: add: - NET_ADMIN {% endif %} + {% if ovn_dummy_gateway_bridge=="true" %} + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + #!/bin/bash + ovs-vsctl --may-exist add-br br-ex + ip a a dev br-ex 10.44.0.1/32 || /bin/true + {% endif %} {% else %} command: ["/root/ovnkube.sh", "ovn-master"] securityContext: @@ -259,6 +271,8 @@ spec: value: "{{ ovn_ssl_en }}" - name: OVN_GATEWAY_MODE value: "{{ ovn_gateway_mode }}" + - name: OVN_GATEWAY_OPTS + value: "{{ ovn_gateway_opts }}" - name: OVN_MULTICAST_ENABLE value: "{{ ovn_multicast_enable }}" - name: OVN_ACL_LOGGING_RATE_LIMIT diff --git a/dist/templates/ovnkube-node.yaml.j2 b/dist/templates/ovnkube-node.yaml.j2 index e66701c070..d05a97428a 100644 --- a/dist/templates/ovnkube-node.yaml.j2 +++ b/dist/templates/ovnkube-node.yaml.j2 @@ -39,7 +39,18 @@ spec: - name: ovnkube-node image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}" imagePullPolicy: "{{ ovn_image_pull_policy | default('IfNotPresent') }}" - + {% if ovn_dummy_gateway_bridge=="true" %} + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + #!/bin/bash + ovs-vsctl --may-exist add-br br-ex + ip a a dev br-ex 10.44.0.1/32 || /bin/true + {% endif %} command: ["/root/ovnkube.sh", "ovn-node"] securityContext: From 3c2b0d4057709de85b7219fe97d0ab800db8f586 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 25 May 2023 19:22:04 +0200 Subject: [PATCH 71/90] Initialize EgressIP stopChan in cluster-manager Without it Stop() will always fail with `close of nil channel`. Additionally removed unused fields from FakeClusterManager. Signed-off-by: Patryk Diak --- go-controller/pkg/clustermanager/egressip_controller.go | 1 + .../pkg/clustermanager/fake_cluster_manager_test.go | 9 +-------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go index f2d8d0587f..ff358f77ca 100644 --- a/go-controller/pkg/clustermanager/egressip_controller.go +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -413,6 +413,7 @@ func newEgressIPController(ovnClient *util.OVNClusterManagerClientset, wf *facto egressIPTotalTimeout: config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout, reachabilityCheckInterval: egressIPReachabilityCheckInterval, egressIPNodeHealthCheckPort: config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort, + stopChan: make(chan struct{}), } eIPC.initRetryFramework() return eIPC diff --git a/go-controller/pkg/clustermanager/fake_cluster_manager_test.go b/go-controller/pkg/clustermanager/fake_cluster_manager_test.go index 805f9d181e..7456aeb539 100644 --- a/go-controller/pkg/clustermanager/fake_cluster_manager_test.go +++ b/go-controller/pkg/clustermanager/fake_cluster_manager_test.go @@ -1,8 +1,6 @@ package clustermanager import ( - "sync" - "github.com/onsi/gomega" egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" @@ -17,8 +15,6 @@ type FakeClusterManager struct { fakeClient *util.OVNClusterManagerClientset watcher *factory.WatchFactory eIPC *egressIPClusterController - stopChan chan struct{} - wg *sync.WaitGroup fakeRecorder *record.FakeRecorder } @@ -52,14 +48,11 @@ func (o *FakeClusterManager) init() { err = o.watcher.Start() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - o.stopChan = make(chan struct{}) - o.wg = &sync.WaitGroup{} o.eIPC = newEgressIPController(o.fakeClient, o.watcher, o.fakeRecorder) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } func (o *FakeClusterManager) shutdown() { o.watcher.Shutdown() - close(o.stopChan) - o.wg.Wait() + o.eIPC.Stop() } From 22d716eab356df63b4c23a2bda44cc69d1b094ca Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 5 May 2023 12:07:29 +0200 Subject: [PATCH 72/90] e2e: improve the error message of the getNetCIDRSubnet func Signed-off-by: Miguel Duarte Barroso --- test/e2e/multihoming_utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/multihoming_utils.go b/test/e2e/multihoming_utils.go index 98bddbb5b4..0103a7d2e2 100644 --- a/test/e2e/multihoming_utils.go +++ b/test/e2e/multihoming_utils.go @@ -29,7 +29,7 @@ func getNetCIDRSubnet(netCIDR string) (string, error) { } else if len(subStrings) == 2 { return netCIDR, nil } - return "", fmt.Errorf("invalid network cidr %s", netCIDR) + return "", fmt.Errorf("invalid network cidr: %q", netCIDR) } type networkAttachmentConfig struct { From dcff8cd8bda821190572498b97af1672619e6a8d Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 11 May 2023 17:42:31 +0200 Subject: [PATCH 73/90] multi-net policies, IPAM less: fail when converting invalid policies Network policies targeting IPAM less networks can only have `ipBlock` peers; while this behavior already existed, it now fails a lot earlier, when translating the multi-net policies into regular `NetworkPolicy`, thus making the flow more explicit, efficient, and readable. Signed-off-by: Miguel Duarte Barroso --- .../base_network_controller_multipolicy.go | 22 ++ ...ase_network_controller_multipolicy_test.go | 214 ++++++++++++++++++ .../ovn/base_network_controller_secondary.go | 20 +- 3 files changed, 252 insertions(+), 4 deletions(-) create mode 100644 go-controller/pkg/ovn/base_network_controller_multipolicy_test.go diff --git a/go-controller/pkg/ovn/base_network_controller_multipolicy.go b/go-controller/pkg/ovn/base_network_controller_multipolicy.go index cd2a8aab85..ebb57ad1e1 100644 --- a/go-controller/pkg/ovn/base_network_controller_multipolicy.go +++ b/go-controller/pkg/ovn/base_network_controller_multipolicy.go @@ -122,3 +122,25 @@ func convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) *knet. } return &policy } + +func (bnc *BaseNetworkController) convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) (*knet.NetworkPolicy, error) { + if !bnc.doesNetworkRequireIPAM() { + var peers []mnpapi.MultiNetworkPolicyPeer + for _, rule := range mpolicy.Spec.Ingress { + peers = append(peers, rule.From...) + } + for _, rule := range mpolicy.Spec.Egress { + peers = append(peers, rule.To...) + } + for _, peer := range peers { + if doesPeerRequireNetworkIPAM(peer) { + return nil, fmt.Errorf("invalid peer %v in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", peer, mpolicy.Name) + } + } + } + return convertMultiNetPolicyToNetPolicy(mpolicy), nil +} + +func doesPeerRequireNetworkIPAM(peer mnpapi.MultiNetworkPolicyPeer) bool { + return peer.PodSelector != nil || peer.NamespaceSelector != nil +} diff --git a/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go b/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go new file mode 100644 index 0000000000..4692b73fc7 --- /dev/null +++ b/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go @@ -0,0 +1,214 @@ +package ovn + +import ( + "fmt" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + netv1 "k8s.io/api/networking/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" + netplumbersv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +var _ = Describe("convertMultiNetPolicyToNetPolicy", func() { + const policyName = "pol33" + + var nci *CommonNetworkControllerInfo + + BeforeEach(func() { + nci = &CommonNetworkControllerInfo{nbClient: nil} + }) + + It("translates an IPAM policy with namespace selectors", func() { + nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithNamespaceSelector(policyName))).To( + Equal( + &netv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{ + { + From: []netv1.NetworkPolicyPeer{{NamespaceSelector: sameLabelsEverywhere()}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + Egress: []netv1.NetworkPolicyEgressRule{}, + PolicyTypes: []netv1.PolicyType{}, + }, + })) + }) + + It("translates an IPAM policy with pod selectors", func() { + nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithPodSelector(policyName))).To( + Equal( + &netv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{ + { + From: []netv1.NetworkPolicyPeer{{PodSelector: sameLabelsEverywhere()}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + Egress: []netv1.NetworkPolicyEgressRule{}, + PolicyTypes: []netv1.PolicyType{}, + }, + })) + }) + + It("translates an IPAM policy with `ipBlock` selectors", func() { + nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIPBlock())).To(Equal( + &netv1.NetworkPolicy{ + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{ + { + From: []netv1.NetworkPolicyPeer{{IPBlock: &netv1.IPBlock{CIDR: "10.10.0.0/16"}}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + Egress: []netv1.NetworkPolicyEgressRule{}, + PolicyTypes: []netv1.PolicyType{}, + }, + }, + )) + }) + + It("translates an IPAM-less policy with `ipBlock` selectors", func() { + nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIPBlock())).To( + Equal( + &netv1.NetworkPolicy{ + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{ + { + From: []netv1.NetworkPolicyPeer{{IPBlock: &netv1.IPBlock{CIDR: "10.10.0.0/16"}}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + Egress: []netv1.NetworkPolicyEgressRule{}, + PolicyTypes: []netv1.PolicyType{}, + }, + }, + )) + }) + + It("*fails* to translate an IPAM-less policy with pod selector peers", func() { + nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + _, err = bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithPodSelector(policyName)) + Expect(err).To( + MatchError( + MatchRegexp(fmt.Sprintf("invalid peer .* in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", policyName)))) + }) + + It("translates an IPAM-less policy with namespace selector peers", func() { + nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + _, err = bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithNamespaceSelector(policyName)) + Expect(err).To(MatchError( + MatchRegexp(fmt.Sprintf("invalid peer .* in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", policyName)))) + }) +}) + +func sameLabelsEverywhere() *metav1.LabelSelector { + return &metav1.LabelSelector{ + MatchLabels: map[string]string{"George": "Costanza"}, + } +} + +func ipamNetAttachDef() *netplumbersv1.NetworkAttachmentDefinition { + return &netplumbersv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "flatl2", + Namespace: "default", + }, + Spec: netplumbersv1.NetworkAttachmentDefinitionSpec{ + Config: `{ + "cniVersion": "0.4.0", + "name": "flatl2", + "netAttachDefName": "default/flatl2", + "topology": "layer2", + "type": "ovn-k8s-cni-overlay", + "subnets": "192.100.200.0/24" + }`, + }, + } +} + +func ipamlessNetAttachDef() *netplumbersv1.NetworkAttachmentDefinition { + return &netplumbersv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "flatl2", + Namespace: "default", + }, + Spec: netplumbersv1.NetworkAttachmentDefinitionSpec{ + Config: `{ + "cniVersion": "0.4.0", + "name": "flatl2", + "netAttachDefName": "default/flatl2", + "topology": "layer2", + "type": "ovn-k8s-cni-overlay" + }`, + }, + } +} +func multiNetPolicyWithIPBlock() *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + Spec: v1beta1.MultiNetworkPolicySpec{ + Ingress: []v1beta1.MultiNetworkPolicyIngressRule{ + { + From: []v1beta1.MultiNetworkPolicyPeer{ + { + IPBlock: &v1beta1.IPBlock{ + CIDR: "10.10.0.0/16", + }, + }, + }, + }, + }, + }, + } +} + +func multiNetPolicyWithPodSelector(policyName string) *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: v1beta1.MultiNetworkPolicySpec{ + Ingress: []v1beta1.MultiNetworkPolicyIngressRule{ + { + From: []v1beta1.MultiNetworkPolicyPeer{{PodSelector: sameLabelsEverywhere()}}, + }, + }, + }, + } +} + +func multiNetPolicyWithNamespaceSelector(policyName string) *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: v1beta1.MultiNetworkPolicySpec{ + Ingress: []v1beta1.MultiNetworkPolicyIngressRule{ + { + From: []v1beta1.MultiNetworkPolicyPeer{{NamespaceSelector: sameLabelsEverywhere()}}, + }, + }, + }, + } +} diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index f888df5fc3..a77dc9ae18 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -70,7 +70,10 @@ func (bsnc *BaseSecondaryNetworkController) AddSecondaryNetworkResourceCommon(ob return nil } - np := convertMultiNetPolicyToNetPolicy(mp) + np, err := bsnc.convertMultiNetPolicyToNetPolicy(mp) + if err != nil { + return err + } if err := bsnc.addNetworkPolicy(np); err != nil { klog.Infof("MultiNetworkPolicy add failed for %s/%s, will try again later: %v", mp.Namespace, mp.Name, err) @@ -114,7 +117,10 @@ func (bsnc *BaseSecondaryNetworkController) UpdateSecondaryNetworkResourceCommon newShouldApply := bsnc.shouldApplyMultiPolicy(newMp) if oldShouldApply { // this multi-netpol no longer applies to this network controller, delete it - np := convertMultiNetPolicyToNetPolicy(oldMp) + np, err := bsnc.convertMultiNetPolicyToNetPolicy(oldMp) + if err != nil { + return err + } if err := bsnc.deleteNetworkPolicy(np); err != nil { klog.Infof("MultiNetworkPolicy delete failed for %s/%s, will try again later: %v", oldMp.Namespace, oldMp.Name, err) @@ -123,7 +129,10 @@ func (bsnc *BaseSecondaryNetworkController) UpdateSecondaryNetworkResourceCommon } if newShouldApply { // now this multi-netpol applies to this network controller - np := convertMultiNetPolicyToNetPolicy(newMp) + np, err := bsnc.convertMultiNetPolicyToNetPolicy(newMp) + if err != nil { + return err + } if err := bsnc.addNetworkPolicy(np); err != nil { klog.Infof("MultiNetworkPolicy add failed for %s/%s, will try again later: %v", newMp.Namespace, newMp.Name, err) @@ -161,7 +170,10 @@ func (bsnc *BaseSecondaryNetworkController) DeleteSecondaryNetworkResourceCommon if !ok { return fmt.Errorf("could not cast %T object to *multinetworkpolicyapi.MultiNetworkPolicy", obj) } - np := convertMultiNetPolicyToNetPolicy(mp) + np, err := bsnc.convertMultiNetPolicyToNetPolicy(mp) + if err != nil { + return err + } // delete this policy regardless it applies to this network controller, in case of missing update event if err := bsnc.deleteNetworkPolicy(np); err != nil { klog.Infof("MultiNetworkPolicy delete failed for %s/%s, will try again later: %v", From 2482921865f778b076d3135756de708ee588651f Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 18 May 2023 10:38:39 +0200 Subject: [PATCH 74/90] multi-net policies, IPAM less: allow ipBlock peers Signed-off-by: Miguel Duarte Barroso --- go-controller/pkg/ovn/base_network_controller_namespace.go | 5 ++--- go-controller/pkg/ovn/base_network_controller_pods.go | 5 ++++- go-controller/pkg/ovn/base_network_controller_secondary.go | 6 ------ .../pkg/ovn/base_secondary_layer2_network_controller.go | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller_namespace.go b/go-controller/pkg/ovn/base_network_controller_namespace.go index b20f1cfaf8..55c6dae160 100644 --- a/go-controller/pkg/ovn/base_network_controller_namespace.go +++ b/go-controller/pkg/ovn/base_network_controller_namespace.go @@ -66,9 +66,8 @@ func getNamespaceAddrSetDbIDs(namespaceName, controller string) *libovsdbops.DbO func (bnc *BaseNetworkController) WatchNamespaces() error { if bnc.IsSecondary() { // For secondary networks, we don't have to watch namespace events if - // multi-network policy support is not enabled. We don't support - // multi-network policy for IPAM-less secondary networks either. - if !util.IsMultiNetworkPoliciesSupportEnabled() || !bnc.doesNetworkRequireIPAM() { + // multi-network policy support is not enabled. + if !util.IsMultiNetworkPoliciesSupportEnabled() { return nil } } diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 87e28806b5..b20f5bed1e 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -478,7 +478,10 @@ func (bnc *BaseNetworkController) podExpectedInLogicalCache(pod *kapi.Pod) bool if err != nil { return false } - return !util.PodWantsHostNetwork(pod) && !bnc.lsManager.IsNonHostSubnetSwitch(switchName) && !util.PodCompleted(pod) + return !util.PodWantsHostNetwork(pod) && + !(bnc.lsManager.IsNonHostSubnetSwitch(switchName) && + bnc.doesNetworkRequireIPAM()) && + !util.PodCompleted(pod) } func (bnc *BaseNetworkController) getExpectedSwitchName(pod *kapi.Pod) (string, error) { diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index a77dc9ae18..3239eb2f72 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -532,12 +532,6 @@ func (bsnc *BaseSecondaryNetworkController) WatchMultiNetworkPolicy() error { return nil } - // if this network does not have ipam, network policy is not supported. - if !bsnc.doesNetworkRequireIPAM() { - klog.Infof("Network policy is not supported on network %s", bsnc.GetNetworkName()) - return nil - } - if bsnc.policyHandler != nil { return nil } diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index 74af5e3a1b..e2112cda32 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -169,7 +169,7 @@ func (oc *BaseSecondaryLayer2NetworkController) initRetryFramework() { // For secondary networks, we don't have to watch namespace events if // multi-network policy support is not enabled. We don't support // multi-network policy for IPAM-less secondary networks either. - if util.IsMultiNetworkPoliciesSupportEnabled() && oc.doesNetworkRequireIPAM() { + if util.IsMultiNetworkPoliciesSupportEnabled() { oc.retryNamespaces = oc.newRetryFramework(factory.NamespaceType) oc.retryNetworkPolicies = oc.newRetryFramework(factory.MultiNetworkPolicyType) } From 5447059f72f960f371f8de32d1bc5021228f95e2 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 5 May 2023 12:06:49 +0200 Subject: [PATCH 75/90] multi-net policies, IPAM less, e2e: add test for IPAMless flat L2 policies Signed-off-by: Miguel Duarte Barroso --- test/e2e/multihoming.go | 44 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 166e80a1d5..c870b9d554 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -616,6 +616,7 @@ var _ = Describe("Multi Homing", func() { Context("multi-network policies", func() { const ( generatedNamespaceNamePrefix = "pepe" + blockedServerStaticIP = "192.168.200.30" ) var extraNamespace *v1.Namespace @@ -684,10 +685,12 @@ var _ = Describe("Multi Homing", func() { By("asserting the server pod has an IP from the configured range") serverIP, err := podIPForAttachment(cs, serverPodConfig.namespace, serverPodConfig.name, netConfig.name, 0) Expect(err).NotTo(HaveOccurred()) - By(fmt.Sprintf("asserting the server pod IP %v is from the configured range %v/%v", serverIP, netConfig.cidr, netPrefixLengthPerNode)) - subnet, err := getNetCIDRSubnet(netConfig.cidr) - Expect(err).NotTo(HaveOccurred()) - Expect(inRange(subnet, serverIP)).To(Succeed()) + if netConfig.cidr != "" { + By(fmt.Sprintf("asserting the server pod IP %v is from the configured range %v/%v", serverIP, netConfig.cidr, netPrefixLengthPerNode)) + subnet, err := getNetCIDRSubnet(netConfig.cidr) + Expect(err).NotTo(HaveOccurred()) + Expect(inRange(subnet, serverIP)).To(Succeed()) + } if doesPolicyFeatAnIPBlock(policy) { blockedIP, err := podIPForAttachment(cs, f.Namespace.Name, blockedClientPodConfig.name, netConfig.name, 0) @@ -1022,6 +1025,39 @@ var _ = Describe("Multi Homing", func() { port, ), ), + + table.Entry( + "for an IPAMless pure L2 overlay when the multi-net policy describes the allow-list using IPBlock", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "layer2", + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName, IPRequest: []string{clientIP}}}, + name: allowedClient(clientPodName), + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName, IPRequest: []string{blockedServerStaticIP + "/24"}}}, + name: blockedClient(clientPodName), + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName, IPRequest: []string{staticServerIP}}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + labels: map[string]string{"app": "stuff-doer"}, + }, + multiNetIngressLimitingIPBlockPolicy( + secondaryNetworkName, + metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "stuff-doer"}, + }, + mnpapi.IPBlock{ + CIDR: "192.168.200.0/24", + Except: []string{blockedServerStaticIP}, + }, + port, + ), + ), ) }) }) From 3b5c3d735e0b72e66e35846bfa5b309e4c5255fa Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 23 May 2023 17:59:43 +0200 Subject: [PATCH 76/90] multi-net policies, IPAM less: only warn for IPAMed networks Signed-off-by: Miguel Duarte Barroso --- go-controller/pkg/ovn/base_network_controller_namespace.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/go-controller/pkg/ovn/base_network_controller_namespace.go b/go-controller/pkg/ovn/base_network_controller_namespace.go index 55c6dae160..e8ef6c8f7a 100644 --- a/go-controller/pkg/ovn/base_network_controller_namespace.go +++ b/go-controller/pkg/ovn/base_network_controller_namespace.go @@ -350,6 +350,10 @@ func (bnc *BaseNetworkController) updateNamespaceAclLogging(ns, aclAnnotation st } func (bnc *BaseNetworkController) getAllNamespacePodAddresses(ns string) []net.IP { + if !bnc.doesNetworkRequireIPAM() { + return nil + } + var ips []net.IP // Get all the pods in the namespace and append their IP to the address_set existingPods, err := bnc.watchFactory.GetPods(ns) From 59fbb7bad19b77b4bf4c692273c8ea8ea968a7d1 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 26 May 2023 16:33:25 +0200 Subject: [PATCH 77/90] docs, multi-homing, policy: document IPAM-less policies restrictions Signed-off-by: Miguel Duarte Barroso --- docs/multi-homing.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/multi-homing.md b/docs/multi-homing.md index 2bd02f2921..a17a698628 100644 --- a/docs/multi-homing.md +++ b/docs/multi-homing.md @@ -293,9 +293,10 @@ Please note the `MultiNetworkPolicy` has the **exact same** API of the native `networking.k8s.io/v1` `NetworkPolicy`object; check its documentation for more information. -**Note:** It is currently **required** for the `net-attach-def`s referred to by -the `k8s.v1.cni.cncf.io/policy-for` annotation to have the `subnets` attribute -in its `spec.config` defined. +**Note:** `net-attach-def`s referred to by the `k8s.v1.cni.cncf.io/policy-for` +annotation without the subnet attribute defined are possible if the policy +**only features** `ipBlock` peers. If the `net-attach-def` features the +`subnet` attribute, it can also feature `namespaceSelectors` and `podSelectors`. ## Limitations OVN-K currently does **not** support: From de495c2b07bc1cafad205a6a810b72033363ccca Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Mon, 29 May 2023 13:23:08 +0200 Subject: [PATCH 78/90] e2e, kind: parametrize OCI_BIN when creating secondary interfaces Signed-off-by: Miguel Duarte Barroso --- contrib/kind.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/kind.sh b/contrib/kind.sh index e451cc6d2f..04e61f4a05 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -1066,11 +1066,11 @@ docker_create_second_interface() { echo "adding second interfaces to nodes" # Create the network as dual stack, regardless of the type of the deployment. Ignore if already exists. - docker network create --ipv6 --driver=bridge kindexgw --subnet=172.19.0.0/16 --subnet=fc00:f853:ccd:e798::/64 || true + "$OCI_BIN" network create --ipv6 --driver=bridge kindexgw --subnet=172.19.0.0/16 --subnet=fc00:f853:ccd:e798::/64 || true KIND_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}") for n in $KIND_NODES; do - docker network connect kindexgw "$n" + "$OCI_BIN" network connect kindexgw "$n" done } From db413958d716ee7c27398b659197865e6daa6358 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Mon, 29 May 2023 13:27:54 +0200 Subject: [PATCH 79/90] multi-homing, e2e tests: extra interface for the kind deployment When on a multi-homing lane create a secondary network **without** external access (the ovnkube-node gateway init code would pick an interface with a default route as the interface to create `br-ex` on top of). Signed-off-by: Miguel Duarte Barroso --- contrib/kind.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/contrib/kind.sh b/contrib/kind.sh index 04e61f4a05..24b4fc99ea 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -1074,6 +1074,20 @@ docker_create_second_interface() { done } +docker_create_second_disconnected_interface() { + echo "adding second interfaces to nodes" + local bridge_name="${1:-kindexgw}" + echo "bridge: $bridge_name" + + # Create the network without subnets; ignore if already exists. + "$OCI_BIN" network create --internal --driver=bridge "$bridge_name" || true + + KIND_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}") + for n in $KIND_NODES; do + "$OCI_BIN" network connect "$bridge_name" "$n" + done +} + sleep_until_pods_settle() { echo "Pods are all up, allowing things settle for 30 seconds..." sleep 30 @@ -1169,6 +1183,7 @@ fi if [ "$ENABLE_MULTI_NET" == true ]; then install_multus install_mpolicy_crd + docker_create_second_disconnected_interface "underlay" # localnet scenarios require an extra interface fi kubectl_wait_pods sleep_until_pods_settle From 18d28081e51e88d45ec4e523844ffd504bb4f5cf Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 25 May 2023 17:02:01 +0200 Subject: [PATCH 80/90] multi-homing, localnet, tests: configure underlay This commit adds code to configure the cluster underlay to provide east/west connectivity between pods using a localnet secondary network, something which will allows us to test the underlay topology - since up to now we weren't actually testing anything meaningful: since all pods using a secondary network were scheduled in the same node, the underlay was not being used. Signed-off-by: Miguel Duarte Barroso --- test/e2e/localnet-underlay.go | 163 ++++++++++++++++++++++++++++++++++ test/e2e/multihoming.go | 36 +++++--- 2 files changed, 189 insertions(+), 10 deletions(-) create mode 100644 test/e2e/localnet-underlay.go diff --git a/test/e2e/localnet-underlay.go b/test/e2e/localnet-underlay.go new file mode 100644 index 0000000000..97e9edaa3f --- /dev/null +++ b/test/e2e/localnet-underlay.go @@ -0,0 +1,163 @@ +package e2e + +import ( + "context" + "fmt" + "strings" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" +) + +const ( + bridgeName = "ovsbr1" + add = "add-br" + del = "del-br" +) + +func setupUnderlay(ovsPods []v1.Pod, portName string, nadConfig networkAttachmentConfig) error { + for _, ovsPod := range ovsPods { + if err := addOVSBridge(ovsPod.Name, bridgeName); err != nil { + return err + } + + if nadConfig.vlanID > 0 { + if err := ovsEnableVLANAccessPort(ovsPod.Name, bridgeName, portName, nadConfig.vlanID); err != nil { + return err + } + } else { + if err := ovsAttachPortToBridge(ovsPod.Name, bridgeName, portName); err != nil { + return err + } + } + + if err := configureBridgeMappings( + ovsPod.Name, + defaultNetworkBridgeMapping(), + bridgeMapping(nadConfig.attachmentName(), bridgeName), + ); err != nil { + return err + } + } + return nil +} + +func teardownUnderlay(ovsPods []v1.Pod) error { + for _, ovsPod := range ovsPods { + if err := removeOVSBridge(ovsPod.Name, bridgeName); err != nil { + return err + } + } + return nil +} + +func ovsPods(clientSet clientset.Interface) []v1.Pod { + const ( + ovnKubernetesNamespace = "ovn-kubernetes" + ovsNodeLabel = "app=ovs-node" + ) + pods, err := clientSet.CoreV1().Pods(ovnKubernetesNamespace).List( + context.Background(), + metav1.ListOptions{LabelSelector: ovsNodeLabel}, + ) + if err != nil { + return nil + } + return pods.Items +} + +func addOVSBridge(ovnNodeName string, bridgeName string) error { + _, err := runCommand(ovsBridgeCommand(ovnNodeName, add, bridgeName)...) + if err != nil { + return fmt.Errorf("failed to ADD OVS bridge %s: %v", bridgeName, err) + } + return nil +} + +func removeOVSBridge(ovnNodeName string, bridgeName string) error { + _, err := runCommand(ovsBridgeCommand(ovnNodeName, del, bridgeName)...) + if err != nil { + return fmt.Errorf("failed to DELETE OVS bridge %s: %v", bridgeName, err) + } + return nil +} + +func ovsBridgeCommand(ovnNodeName string, addOrDeleteCmd string, bridgeName string) []string { + return []string{ + "kubectl", "-n", "ovn-kubernetes", "exec", ovnNodeName, "--", + "ovs-vsctl", addOrDeleteCmd, bridgeName, + } +} + +func ovsAttachPortToBridge(ovsNodeName string, bridgeName string, portName string) error { + cmd := []string{ + "kubectl", "-n", "ovn-kubernetes", "exec", ovsNodeName, "--", + "ovs-vsctl", "add-port", bridgeName, portName, + } + + if _, err := runCommand(cmd...); err != nil { + return fmt.Errorf("failed to add port %s to OVS bridge %s: %v", portName, bridgeName, err) + } + + return nil +} + +func ovsEnableVLANAccessPort(ovsNodeName string, bridgeName string, portName string, vlanID int) error { + cmd := []string{ + "kubectl", "-n", "ovn-kubernetes", "exec", ovsNodeName, "--", + "ovs-vsctl", "add-port", bridgeName, portName, fmt.Sprintf("tag=%d", vlanID), "vlan_mode=access", + } + + if _, err := runCommand(cmd...); err != nil { + return fmt.Errorf("failed to add port %s to OVS bridge %s: %v", portName, bridgeName, err) + } + + return nil +} + +type BridgeMapping struct { + physnet string + ovsBridge string +} + +func (bm BridgeMapping) String() string { + return fmt.Sprintf("%s:%s", bm.physnet, bm.ovsBridge) +} + +type BridgeMappings []BridgeMapping + +func (bms BridgeMappings) String() string { + return strings.Join(Map(bms, func(bm BridgeMapping) string { return bm.String() }), ",") +} + +func Map[T, V any](items []T, fn func(T) V) []V { + result := make([]V, len(items)) + for i, t := range items { + result[i] = fn(t) + } + return result +} + +func configureBridgeMappings(ovnNodeName string, mappings ...BridgeMapping) error { + mappingsString := fmt.Sprintf("external_ids:ovn-bridge-mappings=%s", BridgeMappings(mappings).String()) + cmd := []string{"kubectl", "-n", "ovn-kubernetes", "exec", ovnNodeName, + "--", "ovs-vsctl", "set", "open", ".", mappingsString, + } + _, err := runCommand(cmd...) + return err +} + +func defaultNetworkBridgeMapping() BridgeMapping { + return BridgeMapping{ + physnet: "physnet", + ovsBridge: "breth0", + } +} + +func bridgeMapping(physnet, ovsBridge string) BridgeMapping { + return BridgeMapping{ + physnet: physnet, + ovsBridge: ovsBridge, + } +} diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index c870b9d554..d028546863 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -266,6 +266,17 @@ var _ = Describe("Multi Homing", func() { clientPodConfig.namespace = f.Namespace.Name serverPodConfig.namespace = f.Namespace.Name + if netConfig.topology == "localnet" { + nodes := ovsPods(cs) + Expect(nodes).NotTo(BeEmpty()) + defer func() { + Expect(teardownUnderlay(nodes)).To(Succeed()) + }() + + const secondaryInterfaceName = "eth1" + Expect(setupUnderlay(nodes, secondaryInterfaceName, netConfig)).To(Succeed()) + } + By("creating the attachment configuration") _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create( context.Background(), @@ -511,16 +522,17 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an Localnet secondary network when the pods are scheduled on the same node", + "can communicate over an Localnet secondary network when the pods are scheduled on different nodes", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryLocalnetNetworkCIDR, + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, @@ -530,15 +542,16 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an Localnet secondary network without IPAM when the pods are scheduled on the same node", + "can communicate over an Localnet secondary network without IPAM when the pods are scheduled on different nodes", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, isPrivileged: true, }, podConfiguration{ @@ -550,10 +563,11 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an localnet secondary network without IPAM when the pods are scheduled on the same node, with static IPs configured via network selection elements", + "can communicate over an localnet secondary network without IPAM when the pods are scheduled on different nodes, with static IPs configured via network selection elements", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{ @@ -561,7 +575,7 @@ var _ = Describe("Multi Homing", func() { IPRequest: []string{clientIP}, }}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{ @@ -574,16 +588,17 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an localnet secondary network with an IPv6 subnet when pods are scheduled on the same node", + "can communicate over an localnet secondary network with an IPv6 subnet when pods are scheduled on different nodes", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryIPv6CIDR, + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, @@ -593,16 +608,17 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an localnet secondary network with a dual stack configuration when pods are scheduled on the same node", + "can communicate over an localnet secondary network with a dual stack configuration when pods are scheduled on different nodes", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", cidr: strings.Join([]string{secondaryLocalnetNetworkCIDR, secondaryIPv6CIDR}, ","), + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, From 6d64505816da726cb2ad2ef81971e20be195739a Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 30 May 2023 12:31:31 +0200 Subject: [PATCH 81/90] multi-net, e2e, kind: remove shift instruction The shift instruction when setting the multi-network flag is not required. Signed-off-by: Miguel Duarte Barroso --- contrib/kind.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/contrib/kind.sh b/contrib/kind.sh index 24b4fc99ea..d85128964b 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -321,8 +321,7 @@ parse_args() { ;; --isolated ) OVN_ISOLATED=true ;; - -mne | --multi-network-enable ) shift - ENABLE_MULTI_NET=true + -mne | --multi-network-enable ) ENABLE_MULTI_NET=true ;; --delete ) delete exit From 3d94c34aa810c7287cd8a89403da3abc724b2a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 31 May 2023 19:02:37 +0000 Subject: [PATCH 82/90] Increase unit test timeout to 20m for ovn pkg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a normal, green, unit test run ovn package is already close to the limit of 10m timeout to run the unit tests with the eventual timeout actually happening sometimes: ok github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn 593.856s Signed-off-by: Jaime CaamaƱo Ruiz --- go-controller/hack/test-go.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/go-controller/hack/test-go.sh b/go-controller/hack/test-go.sh index 9e39778b43..fa691a3c10 100755 --- a/go-controller/hack/test-go.sh +++ b/go-controller/hack/test-go.sh @@ -49,6 +49,10 @@ function testrun { if [ ! -z "${COVERALLS:-}" ]; then args="${args} -test.coverprofile=${idx}.coverprofile " fi + if [[ " ${big_pkgs[@]} " =~ " $pkg " ]]; then + echo "Increasing timeout to 20m for package ${pkg}" + args="${args} -test.timeout=20m" + fi if grep -q -r "ginkgo" ."${path}"; then prefix=$(echo "${path}" | cut -c 2- | sed 's,/,_,g') ginkgoargs="-ginkgo.v ${ginkgo_focus} -ginkgo.reportFile ${TEST_REPORT_DIR}/junit-${prefix}.xml" @@ -64,6 +68,9 @@ function testrun { # These packages requires root for network namespace manipulation in unit tests root_pkgs=("github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node") +# These packages are big and require more than the 10m default to run the unit tests +big_pkgs=("github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn") + i=0 for pkg in ${PKGS}; do testrun "${i}" "${pkg}" From 29792cbb1bbbd92aac8d560ce2a5d38675aced72 Mon Sep 17 00:00:00 2001 From: Jacob Tanenbaum Date: Mon, 22 May 2023 10:49:36 -0400 Subject: [PATCH 83/90] rename podLister to localPodLister for hybrid overlay when we create a hybrid overlay Node we pass LocalPodInformer which only knows about Pods local to the node they are running on. The hybrid overlay was assuming it was listening to all pods and manually filtering out those that are not on the local node. This commit fixes that assumption and renames the variables to reflect that they are localPodInformers and localPodListers Signed-off-by: Jacob Tanenbaum --- .../hybrid-overlay/pkg/controller/node.go | 14 ++++------- .../pkg/controller/node_linux.go | 24 +++++++++---------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/go-controller/hybrid-overlay/pkg/controller/node.go b/go-controller/hybrid-overlay/pkg/controller/node.go index 3d551ba257..b9b81da1de 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node.go +++ b/go-controller/hybrid-overlay/pkg/controller/node.go @@ -87,14 +87,14 @@ func NewNode( kube kube.Interface, nodeName string, nodeInformer cache.SharedIndexInformer, - podInformer cache.SharedIndexInformer, + localPodInformer cache.SharedIndexInformer, eventHandlerCreateFunction informer.EventHandlerCreateFunction, ) (*Node, error) { nodeLister := listers.NewNodeLister(nodeInformer.GetIndexer()) - podLister := listers.NewPodLister(podInformer.GetIndexer()) + localPodLister := listers.NewPodLister(localPodInformer.GetIndexer()) - controller, err := newNodeController(kube, nodeName, nodeLister, podLister) + controller, err := newNodeController(kube, nodeName, nodeLister, localPodLister) if err != nil { return nil, err } @@ -119,15 +119,12 @@ func NewNode( if err != nil { return nil, err } - n.podEventHandler, err = eventHandlerCreateFunction("pod", podInformer, + n.podEventHandler, err = eventHandlerCreateFunction("pod", localPodInformer, func(obj interface{}) error { pod, ok := obj.(*kapi.Pod) if !ok { return fmt.Errorf("object is not a pod") } - if pod.Spec.NodeName != nodeName { - return nil - } return n.controller.AddPod(pod) }, func(obj interface{}) error { @@ -135,9 +132,6 @@ func NewNode( if !ok { return fmt.Errorf("object is not a pod") } - if pod.Spec.NodeName != nodeName { - return nil - } return n.controller.DeletePod(pod) }, podChanged, diff --git a/go-controller/hybrid-overlay/pkg/controller/node_linux.go b/go-controller/hybrid-overlay/pkg/controller/node_linux.go index b51a578829..da62c32b0f 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/node_linux.go @@ -53,8 +53,8 @@ type NodeController struct { // channel to indicate we need to update flows immediately flowChan chan struct{} - nodeLister listers.NodeLister - podLister listers.PodLister + nodeLister listers.NodeLister + localPodLister listers.PodLister } // newNodeController returns a node handler that listens for node events @@ -66,18 +66,18 @@ func newNodeController( _ kube.Interface, nodeName string, nodeLister listers.NodeLister, - podLister listers.PodLister, + localPodLister listers.PodLister, ) (nodeController, error) { node := &NodeController{ - nodeName: nodeName, - initState: new(uint32), - vxlanPort: uint16(config.HybridOverlay.VXLANPort), - flowCache: make(map[string]*flowCacheEntry), - flowMutex: sync.Mutex{}, - flowChan: make(chan struct{}, 1), - nodeLister: nodeLister, - podLister: podLister, + nodeName: nodeName, + initState: new(uint32), + vxlanPort: uint16(config.HybridOverlay.VXLANPort), + flowCache: make(map[string]*flowCacheEntry), + flowMutex: sync.Mutex{}, + flowChan: make(chan struct{}, 1), + nodeLister: nodeLister, + localPodLister: localPodLister, } atomic.StoreUint32(node.initState, hotypes.InitialStartup) return node, nil @@ -274,7 +274,7 @@ func (n *NodeController) AddNode(node *kapi.Node) error { err = n.hybridOverlayNodeUpdate(node) } if atomic.LoadUint32(n.initState) == hotypes.DistributedRouterInitialized { - pods, err := n.podLister.List(labels.Everything()) + pods, err := n.localPodLister.List(labels.Everything()) if err != nil { return fmt.Errorf("cannot fully initialize node %s for hybrid overlay, cannot list pods: %v", n.nodeName, err) } From 6ef910750a24cc7991b2c17521c88a2e87310950 Mon Sep 17 00:00:00 2001 From: Jacob Tanenbaum Date: Mon, 22 May 2023 10:55:53 -0400 Subject: [PATCH 84/90] code in the AddPod() for hybrid overlay that inits the Node not required this code in AddPod() is not required and is duplicate effort for the hybrid overlay. code was added to AddNode() that once the drIP and drMAC are set will loop through all pods and initialize them. This check is not requried. Signed-off-by: Jacob Tanenbaum --- .../hybrid-overlay/pkg/controller/node_linux.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/go-controller/hybrid-overlay/pkg/controller/node_linux.go b/go-controller/hybrid-overlay/pkg/controller/node_linux.go index da62c32b0f..8972f23220 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/node_linux.go @@ -112,16 +112,6 @@ func (n *NodeController) AddPod(pod *kapi.Pod) error { // if the IP/MAC or Annotations have changed ignoreLearn := true - if atomic.LoadUint32(n.initState) == hotypes.InitialStartup { - node, err := n.nodeLister.Get(n.nodeName) - if err != nil { - return fmt.Errorf("hybrid overlay not initialized on %s, and failed to get node data: %v", - n.nodeName, err) - } - if err = n.EnsureHybridOverlayBridge(node); err != nil { - return fmt.Errorf("failed to ensure hybrid overlay in pod handler: %v", err) - } - } if n.drMAC == nil || n.drIP == nil { return fmt.Errorf("empty values for DR MAC: %s or DR IP: %s on node %s", n.drMAC, n.drIP, n.nodeName) } @@ -278,7 +268,6 @@ func (n *NodeController) AddNode(node *kapi.Node) error { if err != nil { return fmt.Errorf("cannot fully initialize node %s for hybrid overlay, cannot list pods: %v", n.nodeName, err) } - for _, pod := range pods { err := n.AddPod(pod) if err != nil { From 5719bd891b75f3eb6692a070b33f62e2a4f309bc Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Wed, 24 May 2023 11:26:46 +0200 Subject: [PATCH 85/90] EIP: Move relevant metrics to CM This is the metrics refactor for interconnect following the work done in https://github.com/ovn-org/ovn-kubernetes/pull/3386. Signed-off-by: Surya Seetharaman --- go-controller/pkg/metrics/cluster_manager.go | 50 ++++++++++++++++++++ go-controller/pkg/metrics/master.go | 40 ---------------- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/go-controller/pkg/metrics/cluster_manager.go b/go-controller/pkg/metrics/cluster_manager.go index c00c9b63fc..dc69a50a17 100644 --- a/go-controller/pkg/metrics/cluster_manager.go +++ b/go-controller/pkg/metrics/cluster_manager.go @@ -54,6 +54,30 @@ var metricV6AllocatedHostSubnetCount = prometheus.NewGauge(prometheus.GaugeOpts{ Help: "The total number of v6 host subnets currently allocated", }) +/** EgressIP metrics recorded from cluster-manager begins**/ +var metricEgressIPCount = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemClusterManager, + Name: "num_egress_ips", + Help: "The number of defined egress IP addresses", +}) + +var metricEgressIPNodeUnreacheableCount = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemClusterManager, + Name: "egress_ips_node_unreachable_total", + Help: "The total number of times assigned egress IP(s) were unreachable"}, +) + +var metricEgressIPRebalanceCount = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemClusterManager, + Name: "egress_ips_rebalance_total", + Help: "The total number of times assigned egress IP(s) needed to be moved to a different node"}, +) + +/** EgressIP metrics recorded from cluster-manager ends**/ + // RegisterClusterManagerBase registers ovnkube cluster manager base metrics with the Prometheus registry. // This function should only be called once. func RegisterClusterManagerBase() { @@ -88,6 +112,11 @@ func RegisterClusterManagerFunctional() { prometheus.MustRegister(metricV6HostSubnetCount) prometheus.MustRegister(metricV4AllocatedHostSubnetCount) prometheus.MustRegister(metricV6AllocatedHostSubnetCount) + if config.OVNKubernetesFeature.EnableEgressIP { + prometheus.MustRegister(metricEgressIPNodeUnreacheableCount) + prometheus.MustRegister(metricEgressIPRebalanceCount) + prometheus.MustRegister(metricEgressIPCount) + } } func UnregisterClusterManagerFunctional() { @@ -95,6 +124,11 @@ func UnregisterClusterManagerFunctional() { prometheus.Unregister(metricV6HostSubnetCount) prometheus.Unregister(metricV4AllocatedHostSubnetCount) prometheus.Unregister(metricV6AllocatedHostSubnetCount) + if config.OVNKubernetesFeature.EnableEgressIP { + prometheus.Unregister(metricEgressIPNodeUnreacheableCount) + prometheus.Unregister(metricEgressIPRebalanceCount) + prometheus.Unregister(metricEgressIPCount) + } } // RecordSubnetUsage records the number of subnets allocated for nodes @@ -109,3 +143,19 @@ func RecordSubnetCount(v4SubnetCount, v6SubnetCount float64) { metricV4HostSubnetCount.Set(v4SubnetCount) metricV6HostSubnetCount.Set(v6SubnetCount) } + +// RecordEgressIPReachableNode records how many times EgressIP detected an unuseable node. +func RecordEgressIPUnreachableNode() { + metricEgressIPNodeUnreacheableCount.Inc() +} + +// RecordEgressIPRebalance records how many EgressIPs had to move to a different egress node. +func RecordEgressIPRebalance(count int) { + metricEgressIPRebalanceCount.Add(float64(count)) +} + +// RecordEgressIPCount records the total number of Egress IPs. +// This total may include multiple Egress IPs per EgressIP CR. +func RecordEgressIPCount(count float64) { + metricEgressIPCount.Set(count) +} diff --git a/go-controller/pkg/metrics/master.go b/go-controller/pkg/metrics/master.go index 3c1e3f231b..1e562b8bb1 100644 --- a/go-controller/pkg/metrics/master.go +++ b/go-controller/pkg/metrics/master.go @@ -164,13 +164,6 @@ var MetricMasterLeader = prometheus.NewGauge(prometheus.GaugeOpts{ Help: "Identifies whether the instance of ovnkube-master is a leader(1) or not(0).", }) -var metricEgressIPCount = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: MetricOvnkubeNamespace, - Subsystem: MetricOvnkubeSubsystemMaster, - Name: "num_egress_ips", - Help: "The number of defined egress IP addresses", -}) - var metricEgressIPAssignLatency = prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: MetricOvnkubeNamespace, Subsystem: MetricOvnkubeSubsystemMaster, @@ -187,20 +180,6 @@ var metricEgressIPUnassignLatency = prometheus.NewHistogram(prometheus.Histogram Buckets: prometheus.ExponentialBuckets(.001, 2, 15), }) -var metricEgressIPNodeUnreacheableCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: MetricOvnkubeNamespace, - Subsystem: MetricOvnkubeSubsystemMaster, - Name: "egress_ips_node_unreachable_total", - Help: "The total number of times assigned egress IP(s) were unreachable"}, -) - -var metricEgressIPRebalanceCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: MetricOvnkubeNamespace, - Subsystem: MetricOvnkubeSubsystemMaster, - Name: "egress_ips_rebalance_total", - Help: "The total number of times assigned egress IP(s) needed to be moved to a different node"}, -) - var metricNetpolEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: MetricOvnkubeNamespace, Subsystem: MetricOvnkubeSubsystemMaster, @@ -412,7 +391,6 @@ func RegisterMasterPerformance(nbClient libovsdbclient.Client) { // LE is won. func RegisterMasterFunctional() { // No need to unregister because process exits when leadership is lost. - prometheus.MustRegister(metricEgressIPCount) if config.Metrics.EnableScaleMetrics { klog.Infof("Scale metrics are enabled") prometheus.MustRegister(metricEgressIPAssignLatency) @@ -424,8 +402,6 @@ func RegisterMasterFunctional() { prometheus.MustRegister(metricPodSelectorAddrSetNamespaceEventLatency) prometheus.MustRegister(metricPodEventLatency) } - prometheus.MustRegister(metricEgressIPNodeUnreacheableCount) - prometheus.MustRegister(metricEgressIPRebalanceCount) prometheus.MustRegister(metricEgressFirewallRuleCount) prometheus.MustRegister(metricEgressFirewallCount) prometheus.MustRegister(metricEgressRoutingViaHost) @@ -505,12 +481,6 @@ func RecordPodCreated(pod *kapi.Pod, netInfo util.NetInfo) { } } -// RecordEgressIPCount records the total number of Egress IPs. -// This total may include multiple Egress IPs per EgressIP CR. -func RecordEgressIPCount(count float64) { - metricEgressIPCount.Set(count) -} - // RecordEgressIPAssign records how long it took EgressIP to configure OVN. func RecordEgressIPAssign(duration time.Duration) { metricEgressIPAssignLatency.Observe(duration.Seconds()) @@ -521,16 +491,6 @@ func RecordEgressIPUnassign(duration time.Duration) { metricEgressIPUnassignLatency.Observe(duration.Seconds()) } -// RecordEgressIPReachableNode records how many times EgressIP detected an unuseable node. -func RecordEgressIPUnreachableNode() { - metricEgressIPNodeUnreacheableCount.Inc() -} - -// RecordEgressIPRebalance records how many EgressIPs had to move to a different egress node. -func RecordEgressIPRebalance(count int) { - metricEgressIPRebalanceCount.Add(float64(count)) -} - func RecordNetpolEvent(eventName string, duration time.Duration) { metricNetpolEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) } From 934cd540e36c8b43782fcd5968354e1d0cd6009e Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Wed, 24 May 2023 17:49:46 +0200 Subject: [PATCH 86/90] Add doc changes for IC metrics move Ensuring the work done here: https://github.com/martinkennelly/ovn-kubernetes-1/commit/c47ed896d6eef1e78844cc258deafd20502c348b sees light. Co-Authored-by: Martin Kennelly Signed-off-by: Surya Seetharaman --- docs/metrics.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/metrics.md b/docs/metrics.md index 14d293fb49..797b7d35eb 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -15,8 +15,17 @@ Measurement accuracy can be impacted by other parallel processing that might be |ovnkube_master_network_programming_ovn_duration_seconds| Histogram | The duration for OVN to apply network configuration for a kind (e.g. pod, service, networkpolicy). ## Change log -This list is to help notify if there are additions, changes or removals to metrics. +This list is to help notify if there are additions, changes or removals to metrics. Latest changes are at the top of this list. +- Effect of OVN IC architecture: + - Move the following metrics from subsystem "master" to subsystem "clustermanager". Therefore, the follow metrics are renamed. + - `ovnkube_master_num_v4_host_subnets` -> `ovnkube_clustermanager_num_v4_host_subnets` + - `ovnkube_master_num_v6_host_subnets` -> `ovnkube_clustermanager_num_v6_host_subnets` + - `ovnkube_master_allocated_v4_host_subnets` -> `ovnkube_clustermanager_allocated_v4_host_subnets` + - `ovnkube_master_allocated_v6_host_subnets` -> `ovnkube_clustermanager_allocated_v6_host_subnets` + - `ovnkube_master_num_egress_ips` -> `ovnkube_clustermanager_num_egress_ips` + - `ovnkube_master_egress_ips_node_unreachable_total` -> `ovnkube_clustermanager_egress_ips_node_unreachable_total` + - `ovnkube_master_egress_ips_rebalance_total` -> `ovnkube_clustermanager_egress_ips_rebalance_total` - Update description of ovnkube_master_pod_creation_latency_seconds - Add libovsdb metrics - ovnkube_master_libovsdb_disconnects_total and ovnkube_master_libovsdb_monitors. - Add ovn_controller_southbound_database_connected metric (https://github.com/ovn-org/ovn-kubernetes/pull/3117). From ef27912e234ea5aae610786d7873ab7f2c705d4e Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Fri, 2 Jun 2023 08:15:44 +0200 Subject: [PATCH 87/90] User Facing change: Rename ncm to ovnkube-controller Recently we added the network-controller-manager flag mode for supporting deployments where master will run separately from cluster-manager. From https://github.com/ovn-org/ovn-kubernetes/pull/3366 we have renamed the container to be more generic: `ovnkube-controller`. Since we are still early and only merged this flag a few weeks ago, let's make sure we stay consistent moving forward and rename this flag to ovnkube-controller before bringing this flag downstream into CNO. NOTE: All exisiting internal code can call this NetworkControllerManager. On a user facing level, I'd like to keep this simple and say anything programming ovnkube and OVN DB is the ovnkube-controller similar to ovn-controller container. If we don't do this change now we are going to end up with confusion and mismatch between ncm flag and ovnkube-controller container. Signed-off-by: Surya Seetharaman --- FAQ.md | 2 +- dist/images/ovnkube.sh | 32 ++++++------- dist/templates/ovnkube-cm-ncm.yaml.j2 | 8 ++-- dist/templates/ovnkube-monitor.yaml.j2 | 2 +- docs/ha.md | 14 +++--- go-controller/README.md | 6 +-- go-controller/cmd/ovnkube/ovnkube.go | 48 +++++++++---------- go-controller/pkg/config/config.go | 8 ++-- .../network_controller_manager.go | 2 +- 9 files changed, 61 insertions(+), 61 deletions(-) diff --git a/FAQ.md b/FAQ.md index 83ce1152a1..07ab3cd15f 100644 --- a/FAQ.md +++ b/FAQ.md @@ -21,7 +21,7 @@ need to be made aware of it by following the instructions below. OVN CNI requires several TCP and UDP ports to be opened on each of the node that is part of the K8s cluster. - 1. The node on which ovnkube-master or ovnkube-network-controller-manager runs, open following ports: + 1. The node on which ovnkube-master or ovnkube-controller runs, open following ports: ```text TCP: port 9409 (prometheus port to export ovnkube-master metrics) diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index a3c6203aa0..7503f81659 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -455,7 +455,7 @@ process_healthy() { check_health() { ctl_file="" case ${1} in - "ovnkube" | "ovnkube-master" | "ovn-dbchecker" | "ovnkube-cluster-manager" | "ovnkube-network-controller-manager") + "ovnkube" | "ovnkube-master" | "ovn-dbchecker" | "ovnkube-cluster-manager" | "ovnkube-controller") # just check for presence of pid ;; "ovnnb_db" | "ovnsb_db") @@ -895,7 +895,7 @@ run-ovn-northd() { exit 8 } -# v3 - run ovnkube --master (both cluster-manager and network-controller-manager) +# v3 - run ovnkube --master (both cluster-manager and ovnkube-controller) ovn-master() { trap 'kill $(jobs -p); exit 0' TERM check_ovn_daemonset_version "3" @@ -1086,13 +1086,13 @@ ovn-master() { exit 9 } -# v3 - run ovnkube --network-controller-manager -ovn-network-controller-manager() { +# v3 - run ovnkube --ovnkube-controller +ovnkube-controller() { trap 'kill $(jobs -p); exit 0' TERM check_ovn_daemonset_version "3" - rm -f ${OVN_RUNDIR}/ovnkube-network-controller-manager.pid + rm -f ${OVN_RUNDIR}/ovnkube-controller.pid - echo "=============== ovn-network-controller-manager (wait for ready_to_start_node) ==========" + echo "=============== ovnkube-controller (wait for ready_to_start_node) ==========" wait_for_event ready_to_start_node echo "ovn_nbdb ${ovn_nbdb} ovn_sbdb ${ovn_sbdb}" @@ -1100,7 +1100,7 @@ ovn-network-controller-manager() { wait_for_event process_ready ovn-northd # wait for ovs-servers to start since ovn-master sets some fields in OVS DB - echo "=============== ovn-network-controller-manager - (wait for ovs)" + echo "=============== ovnkube-controller - (wait for ovs)" wait_for_event ovs_ready hybrid_overlay_flags= @@ -1223,9 +1223,9 @@ ovn-network-controller-manager() { fi echo "ovnkube_config_duration_enable_flag: ${ovnkube_config_duration_enable_flag}" - echo "=============== ovn-network-controller-manager ========== MASTER ONLY" + echo "=============== ovnkube-controller ========== MASTER ONLY" /usr/bin/ovnkube \ - --init-network-controller-manager ${K8S_NODE} \ + --init-ovnkube-controller ${K8S_NODE} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --nb-address=${ovn_nbdb} --sb-address=${ovn_sbdb} \ --gateway-mode=${ovn_gateway_mode} \ @@ -1238,8 +1238,8 @@ ovn-network-controller-manager() { ${empty_lb_events_flag} \ ${ovn_v4_join_subnet_opt} \ ${ovn_v6_join_subnet_opt} \ - --pidfile ${OVN_RUNDIR}/ovnkube-network-controller-manager.pid \ - --logfile /var/log/ovn-kubernetes/ovnkube-network-controller-manager.log \ + --pidfile ${OVN_RUNDIR}/ovnkube-controller.pid \ + --logfile /var/log/ovn-kubernetes/ovnkube-controller.log \ ${ovn_master_ssl_opts} \ ${ovnkube_metrics_tls_opts} \ ${multicast_enabled_flag} \ @@ -1254,10 +1254,10 @@ ovn-network-controller-manager() { --metrics-bind-address ${ovnkube_master_metrics_bind_address} \ --host-network-namespace ${ovn_host_network_namespace} & - echo "=============== ovn-network-controller-manager ========== running" - wait_for_event attempts=3 process_ready ovnkube-network-controller-manager + echo "=============== ovnkube-controller ========== running" + wait_for_event attempts=3 process_ready ovnkube-controller - process_healthy ovnkube-network-controller-manager + process_healthy ovnkube-controller exit 9 } @@ -1730,8 +1730,8 @@ case ${cmd} in "ovn-master") # pod ovnkube-master container ovnkube-master ovn-master ;; -"ovn-network-controller-manager") # pod ovnkube-master container ovnkube-network-controller-manager - ovn-network-controller-manager +"ovnkube-controller") # pod ovnkube-master container ovnkube-controller + ovnkube-controller ;; "ovn-cluster-manager") # pod ovnkube-master container ovnkube-cluster-manager ovn-cluster-manager diff --git a/dist/templates/ovnkube-cm-ncm.yaml.j2 b/dist/templates/ovnkube-cm-ncm.yaml.j2 index bd974fc59b..946e283800 100644 --- a/dist/templates/ovnkube-cm-ncm.yaml.j2 +++ b/dist/templates/ovnkube-cm-ncm.yaml.j2 @@ -1,6 +1,6 @@ # ovnkube-master # daemonset version 3 -# starts master daemons (ovnkube-cluster-manager, ovnkube-network-controller-manager and ovn-northd) each in a separate container +# starts master daemons (ovnkube-cluster-manager, ovnkube-controller and ovn-northd) each in a separate container # it is run on the master(s) kind: Deployment apiVersion: apps/v1 @@ -122,12 +122,12 @@ spec: periodSeconds: 60 # end of container - # network controller manager - - name: ovnkube-network-controller-manager + # ovnkube-controller + - name: ovnkube-controller image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}" imagePullPolicy: "{{ ovn_image_pull_policy | default('IfNotPresent') }}" - command: ["/root/ovnkube.sh", "ovn-network-controller-manager"] + command: ["/root/ovnkube.sh", "ovnkube-controller"] securityContext: runAsUser: 0 diff --git a/dist/templates/ovnkube-monitor.yaml.j2 b/dist/templates/ovnkube-monitor.yaml.j2 index f3411c9d2e..44e23ab1ea 100644 --- a/dist/templates/ovnkube-monitor.yaml.j2 +++ b/dist/templates/ovnkube-monitor.yaml.j2 @@ -1,5 +1,5 @@ # define ServiceMontior and Service resources for ovnkube-cluster-manager, -# ovnkube-master (or ovnkube-network-controller-manager), ovnkube-node and ovnkube-db (required for prometheus monitoring) +# ovnkube-master (or ovnkube-controller), ovnkube-node and ovnkube-db (required for prometheus monitoring) apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor diff --git a/docs/ha.md b/docs/ha.md index 644a88f239..ee52e6c7b0 100644 --- a/docs/ha.md +++ b/docs/ha.md @@ -76,11 +76,11 @@ sudo ovs-appctl -t /var/run/openvswitch/ovnsb_db.ctl \ ## ovnkube master HA setup -ovnkube master has 2 main components - cluster-manager and network-controller-manager. +ovnkube master has 2 main components - cluster-manager and ovnkube-controller. Starting ovnkube with '-init-master', runs both the components. It is also possible to run these components individually by starting 2 ovnkube's one with '-init-cluster-manager' -and the other with '-init-network-controller-manager'. +and the other with '-init-ovnkube-controller'. On the master nodes, we can either * start ovnkube with '-init-master' @@ -105,25 +105,25 @@ nohup sudo ovnkube -k8s-kubeconfig kubeconfig.yaml \ -nb-address="${ovn_nb}" \ -sb-address="${ovn_sb}" 2>&1 & - * start 'ovnkube -init-cluster-manager' and 'ovnkube -init-network-controller-manager' + * start 'ovnkube -init-cluster-manager' and 'ovnkube -init-ovnkube-controller' This should be a deployment with these 2 as containers Eg. ovnkube master supports running in 3 modes. -init-master mode, init-cluster-manager mode or init-network-controller-manager +init-master mode, init-cluster-manager mode or init-ovnkube-controller mode. If ovnkube is run with "-init-master" mode, then there is no need to run the other modes because master mode enables both cluster-manager -and network-controller-manager. If the user desires to run cluster-manager -and network-controller-manager separately, then it is possible to do +and ovnkube-controller. If the user desires to run cluster-manager +and ovnkube-controller separately, then it is possible to do so by running nohup sudo ovnkube -k8s-kubeconfig kubeconfig.yaml \ -loglevel=4 \ -k8s-apiserver="http://$K8S_APISERVER_IP:8080" \ -logfile="/var/log/openvswitch/ovnkube.log" \ - -init-network-controller-manager="$NODENAME" -cluster-subnets="$CLUSTER_IP_SUBNET" \ + -init-ovnkube-controller="$NODENAME" -cluster-subnets="$CLUSTER_IP_SUBNET" \ -init-node="$NODENAME" \ -k8s-service-cidr="$SERVICE_IP_SUBNET" \ -k8s-token="$TOKEN" \ diff --git a/go-controller/README.md b/go-controller/README.md index 8b81b137a3..63bd1c7d26 100644 --- a/go-controller/README.md +++ b/go-controller/README.md @@ -42,8 +42,8 @@ Usage: initialize master which enables both cluster manager (allocates node subnets) and network controller manager (which watches pods/nodes/services/policies and creates OVN db resources), requires the hostname as argument -init-cluster-manager string initialize cluster manager that watches nodes (allocates subnet for each node from the cluster-subnets), requires the hostname as argument and doesn't connect to the OVN dbs. - -init-network-controller-manager string - initialize network-controller-manager (which watches pods/nodes/services/policies and create OVN db resources), requires the hostname as argument. + -init-ovnkube-controller string + initialize ovnkube-controller (which watches pods/nodes/services/policies and create OVN db resources), requires the hostname as argument. -init-node string initialize node, requires the name that node is registered with in kubernetes cluster -cleanup-node string @@ -186,7 +186,7 @@ The aforementioned ovnkube cluster manager will establish the watcher loops for - nodes: as new nodes are born and init-node is called, the subnet IPAM is allocated for the respective nodes ``` -ovnkube --init-network-controller-manager \ +ovnkube --init-ovnkube-controller \ --k8s-cacert \ --k8s-token \ --k8s-apiserver \ diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index d030602e3f..d40416ff40 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -179,10 +179,10 @@ func setupPIDFile(pidfile string) error { // ovnkubeRunMode object stores the run mode of the ovnkube type ovnkubeRunMode struct { - networkControllerManager bool // network controller manager (--init-network-controller-manager or --init-master) is enabled - clusterManager bool // cluster manager (--init-cluster-manager or --init-master) is enabled - node bool // node (--init-node) is enabled - cleanupNode bool // cleanup (--cleanup-node) is enabled + ovnkubeController bool // ovnkube controller (--init-ovnkube-controller or --init-master) is enabled + clusterManager bool // cluster manager (--init-cluster-manager or --init-master) is enabled + node bool // node (--init-node) is enabled + cleanupNode bool // cleanup (--cleanup-node) is enabled // Along with the run mode, an identity is provided that uniquely identifies // this instance vs other instances that might be running in the cluster. @@ -194,22 +194,22 @@ type ovnkubeRunMode struct { // determineOvnkubeRunMode determines the run modes of ovnkube // based on the init flags set. It is possible to run ovnkube in // multiple modes. Allowed multiple modes are: -// - master (controller manager + cluster manager) + node -// - network controller manager + cluster manager -// - network controller manager + node +// - master (ovnkube controller + cluster manager) + node +// - ovnkube controller + cluster manager +// - ovnkube controller + node func determineOvnkubeRunMode(ctx *cli.Context) (*ovnkubeRunMode, error) { mode := &ovnkubeRunMode{} master := ctx.String("init-master") cm := ctx.String("init-cluster-manager") - nm := ctx.String("init-network-controller-manager") + ovnkController := ctx.String("init-ovnkube-controller") node := ctx.String("init-node") cleanup := ctx.String("cleanup-node") if master != "" { - // If init-master is set, then both network controller manager and cluster manager + // If init-master is set, then both ovnkube controller and cluster manager // are enabled - mode.networkControllerManager = true + mode.ovnkubeController = true mode.clusterManager = true } @@ -217,8 +217,8 @@ func determineOvnkubeRunMode(ctx *cli.Context) (*ovnkubeRunMode, error) { mode.clusterManager = true } - if nm != "" { - mode.networkControllerManager = true + if ovnkController != "" { + mode.ovnkubeController = true } if node != "" { @@ -229,19 +229,19 @@ func determineOvnkubeRunMode(ctx *cli.Context) (*ovnkubeRunMode, error) { mode.cleanupNode = true } - if mode.cleanupNode && (mode.clusterManager || mode.networkControllerManager || mode.node) { + if mode.cleanupNode && (mode.clusterManager || mode.ovnkubeController || mode.node) { return nil, fmt.Errorf("cannot run cleanup-node mode along with any other mode") } - if !mode.clusterManager && !mode.networkControllerManager && !mode.node && !mode.cleanupNode { + if !mode.clusterManager && !mode.ovnkubeController && !mode.node && !mode.cleanupNode { return nil, fmt.Errorf("need to specify a mode for ovnkube") } - if !mode.networkControllerManager && mode.clusterManager && mode.node { + if !mode.ovnkubeController && mode.clusterManager && mode.node { return nil, fmt.Errorf("cannot run in both cluster manager and node mode") } - identities := sets.NewString(master, cm, nm, node, cleanup) + identities := sets.NewString(master, cm, ovnkController, node, cleanup) identities.Delete("") if identities.Len() != 1 { return nil, fmt.Errorf("provided no identity or different identities for different modes") @@ -298,7 +298,7 @@ func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { } // no need for leader election in node mode - if !runMode.clusterManager && !runMode.networkControllerManager { + if !runMode.clusterManager && !runMode.ovnkubeController { return runOvnKube(ctx.Context, runMode, ovnClientset, eventRecorder) } @@ -310,10 +310,10 @@ func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { var haConfig *config.HAConfig var name string switch { - case runMode.networkControllerManager && runMode.clusterManager: + case runMode.ovnkubeController && runMode.clusterManager: metrics.RegisterClusterManagerBase() fallthrough - case runMode.networkControllerManager: + case runMode.ovnkubeController: metrics.RegisterMasterBase() haConfig = &config.MasterHA name = "ovn-kubernetes-master-" + config.Default.Zone @@ -420,7 +420,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util var masterWatchFactory *factory.WatchFactory var err error - if runMode.networkControllerManager { + if runMode.ovnkubeController { // create factory and start the controllers asked for masterWatchFactory, err = factory.NewMasterWatchFactory(ovnClientset.GetMasterClientset()) if err != nil { @@ -431,7 +431,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util if runMode.clusterManager { var clusterManagerWatchFactory *factory.WatchFactory - if runMode.networkControllerManager { + if runMode.ovnkubeController { clusterManagerWatchFactory = masterWatchFactory } else { clusterManagerWatchFactory, err = factory.NewClusterManagerWatchFactory(ovnClientset.GetClusterManagerClientset()) @@ -456,7 +456,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util metrics.MetricClusterManagerReadyDuration.Set(time.Since(startTime).Seconds()) } - if runMode.networkControllerManager { + if runMode.ovnkubeController { var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client if libovsdbOvnNBClient, err = libovsdb.NewNBClient(stopChan); err != nil { @@ -536,7 +536,7 @@ type ovnkubeMasterMetrics struct { } func (m ovnkubeMasterMetrics) On(string) { - if m.runMode.networkControllerManager { + if m.runMode.ovnkubeController { metrics.MetricMasterLeader.Set(1) } if m.runMode.clusterManager { @@ -545,7 +545,7 @@ func (m ovnkubeMasterMetrics) On(string) { } func (m ovnkubeMasterMetrics) Off(string) { - if m.runMode.networkControllerManager { + if m.runMode.ovnkubeController { metrics.MetricMasterLeader.Set(0) } if m.runMode.clusterManager { diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index ed81bb719a..9b92003a10 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -652,15 +652,15 @@ var CommonFlags = []cli.Flag{ // Mode flags &cli.StringFlag{ Name: "init-master", - Usage: "initialize master (both cluster-manager and network-controller-manager), requires the hostname as argument", + Usage: "initialize master (both cluster-manager and ovnkube-controller), requires the hostname as argument", }, &cli.StringFlag{ Name: "init-cluster-manager", - Usage: "initialize cluster manager (but not network-controller-manager), requires the hostname as argument", + Usage: "initialize cluster manager (but not ovnkube-controller), requires the hostname as argument", }, &cli.StringFlag{ - Name: "init-network-controller-manager", - Usage: "initialize network-controller-manager (but not cluster-manager), requires the hostname as argument", + Name: "init-ovnkube-controller", + Usage: "initialize ovnkube-controller (but not cluster-manager), requires the hostname as argument", }, &cli.StringFlag{ Name: "init-node", diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index e4ee0b217d..a55be39517 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -177,7 +177,7 @@ func (cm *networkControllerManager) CleanupDeletedNetworks(allControllers []nad. return nil } -// NewNetworkControllerManager creates a new OVN controller manager to manage all the controller for all networks +// NewNetworkControllerManager creates a new OVNKube controller manager to manage all the controller for all networks func NewNetworkControllerManager(ovnClient *util.OVNClientset, identity string, wf *factory.WatchFactory, libovsdbOvnNBClient libovsdbclient.Client, libovsdbOvnSBClient libovsdbclient.Client, recorder record.EventRecorder, wg *sync.WaitGroup) (*networkControllerManager, error) { From 593577e89be84a51571befcd94fa5a799f7d778d Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Fri, 2 Jun 2023 09:09:09 +0200 Subject: [PATCH 88/90] Add NCMWatchFactory We have 3 modes that we can run in: a) cluster-manager b) ovnkube-controller (internally ncm) c) node When IC=true and multiple zones are used cluster-manager and ovnkube-controller are run separately. CloudPrivateIPConfigType is used only from cluster-manger in that case and there is no need to initialize it from ovnkube-controller. However when these modes are run together, we use the master watch factory and this creates confusion. Here is what this PR proposes: a) CMWatch Factory when CM is run independently b) NCMWatch Factory when NCM is run independently c) NodeWatch Factory when node is run independently d) MasterWatch Factory when CM+NCM are run together e) MasterWatch Factory when NCM+Node are run together f) MasterWatch Factory when NCM+Node+CM are run together Signed-off-by: Surya Seetharaman --- go-controller/cmd/ovnkube/ovnkube.go | 20 ++++++++++++--- go-controller/pkg/factory/factory.go | 22 +++++++++++++++-- go-controller/pkg/util/kube.go | 37 ++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index d40416ff40..0d2aae9e3d 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -422,7 +422,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util if runMode.ovnkubeController { // create factory and start the controllers asked for - masterWatchFactory, err = factory.NewMasterWatchFactory(ovnClientset.GetMasterClientset()) + masterWatchFactory, err = factory.NewNCMWatchFactory(ovnClientset.GetNetworkControllerManagerClientset()) if err != nil { return err } @@ -432,6 +432,10 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util if runMode.clusterManager { var clusterManagerWatchFactory *factory.WatchFactory if runMode.ovnkubeController { + masterWatchFactory, err = factory.NewMasterWatchFactory(ovnClientset.GetMasterClientset()) + if err != nil { + return err + } clusterManagerWatchFactory = masterWatchFactory } else { clusterManagerWatchFactory, err = factory.NewClusterManagerWatchFactory(ovnClientset.GetClusterManagerClientset()) @@ -485,15 +489,23 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util if runMode.node { var nodeWatchFactory factory.NodeWatchFactory - if masterWatchFactory == nil { + if runMode.ovnkubeController && runMode.clusterManager { + // masterWatchFactory would be initialized as NewMasterWatchFactory already, let's use that + nodeWatchFactory = masterWatchFactory + } else if runMode.ovnkubeController { + // masterWatchFactory would be initialized as NewNCMWatchFactory, let's change that + masterWatchFactory, err = factory.NewMasterWatchFactory(ovnClientset.GetMasterClientset()) + if err != nil { + return err + } + nodeWatchFactory = masterWatchFactory + } else { var err error nodeWatchFactory, err = factory.NewNodeWatchFactory(ovnClientset.GetNodeClientset(), runMode.identity) if err != nil { return err } defer nodeWatchFactory.Shutdown() - } else { - nodeWatchFactory = masterWatchFactory } if config.Kubernetes.Token == "" { diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 8575a25b70..7032070436 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -153,9 +153,27 @@ var ( ServiceForFakeNodePortWatcherType reflect.Type = reflect.TypeOf(&serviceForFakeNodePortWatcher{}) // only for unit tests ) -// NewMasterWatchFactory initializes a new watch factory for the network controller manager -// or network controller manager+cluster manager or network controller manager+node processes. +// NewMasterWatchFactory initializes a new watch factory for: +// a) ovnkube-controller + cluster manager or +// b) ovnkube-controller + node +// processes. func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory, error) { + wf, err := NewNCMWatchFactory(ovnClientset.GetNetworkControllerManagerClientset()) + if err != nil { + return nil, err + } + wf.cpipcFactory = ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval) + if util.PlatformTypeIsEgressIPCloudProvider() { + wf.informers[CloudPrivateIPConfigType], err = newInformer(CloudPrivateIPConfigType, wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer()) + if err != nil { + return nil, err + } + } + return wf, nil +} + +// NewNCMWatchFactory initializes a new watch factory for the ovnkube controller process +func NewNCMWatchFactory(ovnClientset *util.OVNNetworkControllerManagerClientset) (*WatchFactory, error) { // resync time is 12 hours, none of the resources being watched in ovn-kubernetes have // any race condition where a resync may be required e.g. cni executable on node watching for // events on pods and assuming that an 'ADD' event will contain the annotations put in by diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 753617db3f..d2eb9cb1e6 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -54,6 +54,18 @@ type OVNClientset struct { // OVNMasterClientset type OVNMasterClientset struct { + KubeClient kubernetes.Interface + EgressIPClient egressipclientset.Interface + CloudNetworkClient ocpcloudnetworkclientset.Interface + EgressFirewallClient egressfirewallclientset.Interface + EgressQoSClient egressqosclientset.Interface + MultiNetworkPolicyClient multinetworkpolicyclientset.Interface + EgressServiceClient egressserviceclientset.Interface + AdminPolicyRouteClient adminpolicybasedrouteclientset.Interface +} + +// OVNNetworkControllerManagerClientset +type OVNNetworkControllerManagerClientset struct { KubeClient kubernetes.Interface EgressIPClient egressipclientset.Interface EgressFirewallClient egressfirewallclientset.Interface @@ -78,6 +90,31 @@ type OVNClusterManagerClientset struct { func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { return &OVNMasterClientset{ + KubeClient: cs.KubeClient, + EgressIPClient: cs.EgressIPClient, + CloudNetworkClient: cs.CloudNetworkClient, + EgressFirewallClient: cs.EgressFirewallClient, + EgressQoSClient: cs.EgressQoSClient, + MultiNetworkPolicyClient: cs.MultiNetworkPolicyClient, + EgressServiceClient: cs.EgressServiceClient, + AdminPolicyRouteClient: cs.AdminPolicyRouteClient, + } +} + +func (cs *OVNMasterClientset) GetNetworkControllerManagerClientset() *OVNNetworkControllerManagerClientset { + return &OVNNetworkControllerManagerClientset{ + KubeClient: cs.KubeClient, + EgressIPClient: cs.EgressIPClient, + EgressFirewallClient: cs.EgressFirewallClient, + EgressQoSClient: cs.EgressQoSClient, + MultiNetworkPolicyClient: cs.MultiNetworkPolicyClient, + EgressServiceClient: cs.EgressServiceClient, + AdminPolicyRouteClient: cs.AdminPolicyRouteClient, + } +} + +func (cs *OVNClientset) GetNetworkControllerManagerClientset() *OVNNetworkControllerManagerClientset { + return &OVNNetworkControllerManagerClientset{ KubeClient: cs.KubeClient, EgressIPClient: cs.EgressIPClient, EgressFirewallClient: cs.EgressFirewallClient, From 30f28071c8a0a09cfeeb4c1401ee8b08ba91f509 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 1 Jun 2023 17:14:15 +0200 Subject: [PATCH 89/90] Stop using a waitgroup in route manager Route manager doesn't make any use of the waitgroup it was created with. Additionally it was only calling `wg.Done()` without calling `wg.Add(1)` first. Signed-off-by: Patryk Diak (cherry picked from commit a1b8aefc4e3d1b4ec0e9491e3a9e3e7ae8cabdf8) --- .../node/default_node_network_controller.go | 9 +++-- .../pkg/node/gateway_init_linux_test.go | 38 +++++++++++++------ .../pkg/node/management-port_linux_test.go | 17 ++++++--- go-controller/pkg/node/route_manager.go | 6 +-- go-controller/pkg/node/route_manager_test.go | 9 +++-- 5 files changed, 51 insertions(+), 28 deletions(-) diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 29da5cb2bb..c0c2c6f9c5 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -117,7 +117,7 @@ func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, sto stopChan: stopChan, wg: wg, }, - routeManager: newRouteManager(wg, true, 2*time.Minute), + routeManager: newRouteManager(true, 2*time.Minute), } } @@ -631,8 +631,11 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { if err := level.Set("5"); err != nil { klog.Errorf("Setting klog \"loglevel\" to 5 failed, err: %v", err) } - go nc.routeManager.run(ctx.Done()) - + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.routeManager.run(nc.stopChan) + }() if node, err = nc.Kube.GetNode(nc.name); err != nil { return fmt.Errorf("error retrieving node %s: %v", nc.name, err) } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index b73f83c901..bad2f00fb0 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -253,9 +253,10 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, Expect(err).NotTo(HaveOccurred()) err = nodeAnnotator.Run() Expect(err).NotTo(HaveOccurred()) - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() rm.run(stop) return nil @@ -624,9 +625,10 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, ifAddrs := ovntest.MustParseIPNets(hostCIDR) ifAddrs[0].IP = ovntest.MustParseIP(dpuIP) - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() rm.run(stop) return nil @@ -737,6 +739,7 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, // must run route manager manually which is usually started with nc.Start() wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() nc.routeManager.run(stop) return nil @@ -1054,9 +1057,10 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0`, Expect(err).NotTo(HaveOccurred()) err = nodeAnnotator.Run() Expect(err).NotTo(HaveOccurred()) - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() rm.run(stop) return nil @@ -1558,10 +1562,13 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) netlinkMock.On("RouteAdd", expectedRoute).Return(nil) wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) wg.Add(1) - go rm.run(stopCh) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() @@ -1602,10 +1609,13 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*previousRoute}, nil) netlinkMock.On("RouteReplace", expectedRoute).Return(nil) wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) - go rm.run(stopCh) wg.Add(1) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() @@ -1619,10 +1629,13 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("LinkByName", mock.Anything).Return(nil, fmt.Errorf("failed to find interface")) gwIPs := []net.IP{net.ParseIP("10.0.0.11")} wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) - go rm.run(stopCh) wg.Add(1) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() @@ -1640,10 +1653,13 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("LinkByName", mock.Anything).Return(nil, nil) netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) - go rm.run(stopCh) wg.Add(1) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() diff --git a/go-controller/pkg/node/management-port_linux_test.go b/go-controller/pkg/node/management-port_linux_test.go index fbd729feff..7147c3ee20 100644 --- a/go-controller/pkg/node/management-port_linux_test.go +++ b/go-controller/pkg/node/management-port_linux_test.go @@ -267,16 +267,17 @@ func testManagementPort(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns.Net nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) defer func() { close(stopCh) wg.Wait() }() + wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() - wg.Add(1) rm.run(stopCh) return nil }) @@ -360,10 +361,13 @@ func testManagementPortDPU(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns. nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) - go rm.run(stopCh) wg.Add(1) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() @@ -446,11 +450,12 @@ func testManagementPortDPUHost(ctx *cli.Context, fexec *ovntest.FakeExec, testNS _, err = config.InitConfig(ctx, fexec, nil) Expect(err).NotTo(HaveOccurred()) wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) + wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() - wg.Add(1) rm.run(stopCh) return nil }) diff --git a/go-controller/pkg/node/route_manager.go b/go-controller/pkg/node/route_manager.go index 346c273a8b..f7eddcb940 100644 --- a/go-controller/pkg/node/route_manager.go +++ b/go-controller/pkg/node/route_manager.go @@ -3,7 +3,6 @@ package node import ( "fmt" "net" - "sync" "time" "github.com/vishvananda/netlink" @@ -22,21 +21,19 @@ type routeManager struct { store map[string]routesPerLink // key is link name addRouteCh chan routesPerLink delRouteCh chan routesPerLink - wg *sync.WaitGroup } // newRouteManager manages routes which include adding and deletion of routes. It also manages restoration of managed routes. // Begin managing routes by calling run() to start the manager. // Routes should be added via add(route) and deletion via del(route) functions only. // All other functions are used internally. -func newRouteManager(wg *sync.WaitGroup, logRouteChanges bool, syncPeriod time.Duration) *routeManager { +func newRouteManager(logRouteChanges bool, syncPeriod time.Duration) *routeManager { return &routeManager{ logRouteChanges: logRouteChanges, syncPeriod: syncPeriod, store: make(map[string]routesPerLink), addRouteCh: make(chan routesPerLink, 5), delRouteCh: make(chan routesPerLink, 5), - wg: wg, } } @@ -47,7 +44,6 @@ func (rm *routeManager) run(stopCh <-chan struct{}) { subscribed, routeEventCh = subscribeNetlinkRouteEvents(stopCh) ticker := time.NewTicker(rm.syncPeriod) defer ticker.Stop() - defer rm.wg.Done() for { select { diff --git a/go-controller/pkg/node/route_manager_test.go b/go-controller/pkg/node/route_manager_test.go index 7c40f9fb5d..c7b9351b77 100644 --- a/go-controller/pkg/node/route_manager_test.go +++ b/go-controller/pkg/node/route_manager_test.go @@ -34,7 +34,9 @@ var _ = ginkgo.Describe("Route Manager", func() { loIP := net.IPv4(127, 1, 1, 1) loIPDiff := net.IPv4(127, 1, 1, 2) loGWIP := net.IPv4(127, 1, 1, 254) - if os.Getuid() != 0 { + + if os.Getenv("NOROOT") == "TRUE" { + defer ginkgo.GinkgoRecover() ginkgo.Skip("Test requires root privileges") } @@ -46,10 +48,9 @@ var _ = ginkgo.Describe("Route Manager", func() { wg = &sync.WaitGroup{} stopCh = make(chan struct{}) - wg.Add(1) syncPeriod := 10 * time.Millisecond logAllActivity := true - rm = newRouteManager(wg, logAllActivity, syncPeriod) + rm = newRouteManager(logAllActivity, syncPeriod) err = testNS.Do(func(netNS ns.NetNS) error { defer ginkgo.GinkgoRecover() loLink, err = netlink.LinkByName(loLinkName) @@ -73,7 +74,9 @@ var _ = ginkgo.Describe("Route Manager", func() { return nil }) + wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer ginkgo.GinkgoRecover() rm.run(stopCh) return nil From b7f88264cecc7dc5724adb0254c223c95cbc6890 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Wed, 7 Jun 2023 17:22:55 +0200 Subject: [PATCH 90/90] Fix podIPs not found on IC Signed-off-by: Surya Seetharaman --- .../pkg/ovn/base_network_controller_namespace.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/go-controller/pkg/ovn/base_network_controller_namespace.go b/go-controller/pkg/ovn/base_network_controller_namespace.go index e8ef6c8f7a..ffcdc27ecb 100644 --- a/go-controller/pkg/ovn/base_network_controller_namespace.go +++ b/go-controller/pkg/ovn/base_network_controller_namespace.go @@ -11,6 +11,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/pkg/errors" kapi "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -386,6 +387,14 @@ func (bsnc *BaseNetworkController) removeRemoteZonePodFromNamespaceAddressSet(po podDesc := fmt.Sprintf("pod %s/%s/%s", bsnc.GetNetworkName(), pod.Namespace, pod.Name) podIfAddrs, err := util.GetPodCIDRsWithFullMask(pod, bsnc.NetInfo) if err != nil { + // maybe the pod is not scheduled yet or addLSP has not happened yet, so it doesn't have IPs. + // let us ignore deletion failures for podIPs not found because + // there is nothing more we can do here. + if errors.Is(err, util.ErrNoPodIPFound) { + klog.Errorf("Unable to remove remote zone pod's %s/%s IP address from the "+ + "namespace address-set, err: %v", pod.Namespace, pod.Name, err) + return nil + } return fmt.Errorf("failed to get pod ips for the pod %s/%s : %w", pod.Namespace, pod.Name, err) }