From 465fa780d4188e30e64a38a317a3831e609145e7 Mon Sep 17 00:00:00 2001 From: Yun Zhou Date: Thu, 23 Feb 2023 10:08:51 -0800 Subject: [PATCH 01/12] allow optional subnet for localnet secondary network Signed-off-by: Yun Zhou --- go-controller/pkg/ovn/base_network_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index da72ece3ae..bdad88a098 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -710,5 +710,5 @@ func (bnc *BaseNetworkController) recordNodeErrorEvent(node *kapi.Node, nodeErr } func (bnc *BaseNetworkController) doesNetworkRequireIPAM() bool { - return !(bnc.TopologyType() == types.Layer2Topology && len(bnc.Subnets()) == 0) + return !((bnc.TopologyType() == types.Layer2Topology || bnc.TopologyType() == types.LocalnetTopology) && len(bnc.Subnets()) == 0) } From 26e98053234d3c60ef7a628676bf052373820358 Mon Sep 17 00:00:00 2001 From: Yun Zhou Date: Wed, 22 Feb 2023 14:18:43 -0800 Subject: [PATCH 02/12] e2e, multi-homing: add tests for localnet network Signed-off-by: Yun Zhou --- test/e2e/multihoming.go | 190 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 181 insertions(+), 9 deletions(-) diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 682f09055b..9af8ce159f 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -25,13 +25,16 @@ import ( var _ = Describe("Multi Homing", func() { const ( - podName = "tinypod" - secondaryNetworkCIDR = "10.128.0.0/16" - secondaryNetworkName = "tenant-blue" - secondaryFlatL2IgnoreCIDR = "10.128.0.0/29" - secondaryFlatL2NetworkCIDR = "10.128.0.0/24" - netPrefixLengthPerNode = 24 - secondaryIPv6CIDR = "10:100:200::0/64" + podName = "tinypod" + secondaryNetworkCIDR = "10.128.0.0/16" + secondaryNetworkName = "tenant-blue" + secondaryFlatL2IgnoreCIDR = "10.128.0.0/29" + secondaryFlatL2NetworkCIDR = "10.128.0.0/24" + secondaryLocalnetIgnoreCIDR = "60.128.0.0/29" + secondaryLocalnetNetworkCIDR = "60.128.0.0/24" + netPrefixLengthPerNode = 24 + localnetVLANID = 10 + secondaryIPv6CIDR = "10:100:200::0/64" ) f := wrappedTestFramework("multi-homing") @@ -81,7 +84,7 @@ var _ = Describe("Multi Homing", func() { if netConfig.excludeCIDRs != nil { podIP, err := podIPForAttachment(cs, pod.GetNamespace(), pod.GetName(), secondaryNetworkName, 0) Expect(err).NotTo(HaveOccurred()) - Expect(inRange(secondaryNetworkCIDR, podIP)).To(Succeed()) + Expect(inRange(popCIDRsPerNodePrefix(netConfig.cidr, netPrefixLengthPerNode), podIP)).To(Succeed()) for _, excludedRange := range netConfig.excludeCIDRs { Expect(inRange(excludedRange, podIP)).To( MatchError(fmt.Errorf("ip [%s] is NOT in range %s", podIP, excludedRange))) @@ -160,6 +163,71 @@ var _ = Describe("Multi Homing", func() { name: podName, }, ), + table.Entry( + "when attaching to an localnet - switched - network", + networkAttachmentConfig{ + cidr: secondaryLocalnetNetworkCIDR, + name: secondaryNetworkName, + topology: "localnet", + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), + table.Entry( + "when attaching to an Localnet - switched - network featuring `excludeCIDR`s", + networkAttachmentConfig{ + cidr: secondaryLocalnetNetworkCIDR, + name: secondaryNetworkName, + topology: "localnet", + excludeCIDRs: []string{secondaryLocalnetIgnoreCIDR}, + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), + table.Entry( + "when attaching to an localnet - switched - network without IPAM", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), + table.Entry( + "when attaching to an localnet - switched - network with an IPv6 subnet", + networkAttachmentConfig{ + cidr: secondaryIPv6CIDR, + name: secondaryNetworkName, + topology: "localnet", + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), + table.Entry( + "when attaching to an L2 - switched - network with a dual stack configuration", + networkAttachmentConfig{ + cidr: strings.Join([]string{secondaryLocalnetNetworkCIDR, secondaryIPv6CIDR}, ","), + name: secondaryNetworkName, + topology: "localnet", + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), ) }) @@ -395,6 +463,107 @@ var _ = Describe("Multi Homing", func() { nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), + table.Entry( + "can communicate over an Localnet secondary network when the pods are scheduled on the same node", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + cidr: secondaryLocalnetNetworkCIDR, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + ), + table.Entry( + "can communicate over an Localnet secondary network without IPAM when the pods are scheduled on the same node", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + isPrivileged: true, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + isPrivileged: true, + }, + ), + table.Entry( + "can communicate over an localnet secondary network without IPAM when the pods are scheduled on the same node, with static IPs configured via network selection elements", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{ + Name: secondaryNetworkName, + IPRequest: []string{clientIP}, + }}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{ + Name: secondaryNetworkName, + IPRequest: []string{staticServerIP}, + }}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + ), + table.Entry( + "can communicate over an localnet secondary network with an IPv6 subnet when pods are scheduled on the same node", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + cidr: secondaryIPv6CIDR, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + ), + table.Entry( + "can communicate over an localnet secondary network with a dual stack configuration when pods are scheduled on the same node", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + cidr: strings.Join([]string{secondaryLocalnetNetworkCIDR, secondaryIPv6CIDR}, ","), + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + ), ) }) @@ -482,6 +651,7 @@ type networkAttachmentConfig struct { name string topology string networkName string + vlanID int } func (nac networkAttachmentConfig) attachmentName() string { @@ -507,7 +677,8 @@ func generateNAD(config networkAttachmentConfig) *nadapi.NetworkAttachmentDefini "subnets": %q, "excludeSubnets": %q, "mtu": 1300, - "netAttachDefName": %q + "netAttachDefName": %q, + "vlanID": %d } `, config.attachmentName(), @@ -515,6 +686,7 @@ func generateNAD(config networkAttachmentConfig) *nadapi.NetworkAttachmentDefini config.cidr, strings.Join(config.excludeCIDRs, ","), namespacedName(config.namespace, config.name), + config.vlanID, ) return generateNetAttachDef(config.namespace, config.name, nadSpec) } From 4a3f1dd6f8802fe996287464972230c9df0a8e5a Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Tue, 28 Feb 2023 15:04:35 +0100 Subject: [PATCH 03/12] Add MetricMasterSyncDuration metric to track how much it takes for ovn-k master to start watching every resource. Add scale metrics for network policies, rename existing enable-eip-scale-metrics flag to more general enable-scale-metrics, and use it for network policy metric too. Signed-off-by: Nadia Pinaeva --- go-controller/pkg/config/config.go | 10 +- go-controller/pkg/config/config_test.go | 6 +- go-controller/pkg/metrics/master.go | 105 +++++++++++++++++- .../pkg/ovn/default_network_controller.go | 50 ++++++--- go-controller/pkg/ovn/egressip.go | 4 +- go-controller/pkg/ovn/ovn.go | 19 ++++ go-controller/pkg/ovn/policy.go | 75 ++++++++++++- 7 files changed, 243 insertions(+), 26 deletions(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index b936eea583..aa9d217bb8 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -333,8 +333,8 @@ type MetricsConfig struct { NodeServerCert string `gcfg:"node-server-cert"` // EnableConfigDuration holds the boolean flag to enable OVN-Kubernetes master to monitor OVN-Kubernetes master // configuration duration and optionally, its application to all nodes - EnableConfigDuration bool `gcfg:"enable-config-duration"` - EnableEIPScaleMetrics bool `gcfg:"enable-eip-scale-metrics"` + EnableConfigDuration bool `gcfg:"enable-config-duration"` + EnableScaleMetrics bool `gcfg:"enable-scale-metrics"` } // OVNKubernetesFeatureConfig holds OVN-Kubernetes feature enhancement config file parameters and command-line overrides @@ -1041,9 +1041,9 @@ var MetricsFlags = []cli.Flag{ Destination: &cliConfig.Metrics.EnableConfigDuration, }, &cli.BoolFlag{ - Name: "metrics-enable-eip-scale", - Usage: "Enables metrics related to Egress IP scaling", - Destination: &cliConfig.Metrics.EnableEIPScaleMetrics, + Name: "metrics-enable-scale", + Usage: "Enables metrics related to scaling", + Destination: &cliConfig.Metrics.EnableScaleMetrics, }, } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 57bfb8a148..747d84ac16 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -158,7 +158,7 @@ enable-pprof=true node-server-privkey=/path/to/node-metrics-private.key node-server-cert=/path/to/node-metrics.crt enable-config-duration=true -enable-eip-scale-metrics=true +enable-scale-metrics=true [logging] loglevel=5 @@ -585,7 +585,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Metrics.NodeServerPrivKey).To(gomega.Equal("/path/to/node-metrics-private.key")) gomega.Expect(Metrics.NodeServerCert).To(gomega.Equal("/path/to/node-metrics.crt")) gomega.Expect(Metrics.EnableConfigDuration).To(gomega.Equal(true)) - gomega.Expect(Metrics.EnableEIPScaleMetrics).To(gomega.Equal(true)) + gomega.Expect(Metrics.EnableScaleMetrics).To(gomega.Equal(true)) gomega.Expect(OvnNorth.Scheme).To(gomega.Equal(OvnDBSchemeSSL)) gomega.Expect(OvnNorth.PrivKey).To(gomega.Equal("/path/to/nb-client-private.key")) @@ -673,7 +673,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Metrics.NodeServerPrivKey).To(gomega.Equal("/tls/nodeprivkey")) gomega.Expect(Metrics.NodeServerCert).To(gomega.Equal("/tls/nodecert")) gomega.Expect(Metrics.EnableConfigDuration).To(gomega.Equal(true)) - gomega.Expect(Metrics.EnableEIPScaleMetrics).To(gomega.Equal(true)) + gomega.Expect(Metrics.EnableScaleMetrics).To(gomega.Equal(true)) gomega.Expect(OvnNorth.Scheme).To(gomega.Equal(OvnDBSchemeSSL)) gomega.Expect(OvnNorth.PrivKey).To(gomega.Equal("/client/privkey")) diff --git a/go-controller/pkg/metrics/master.go b/go-controller/pkg/metrics/master.go index 1013cbb46e..3fdf506e80 100644 --- a/go-controller/pkg/metrics/master.go +++ b/go-controller/pkg/metrics/master.go @@ -145,6 +145,17 @@ var MetricMasterReadyDuration = prometheus.NewGauge(prometheus.GaugeOpts{ Help: "The duration for the master to get to ready state", }) +// MetricMasterSyncDuration is the time taken to complete initial Watch for different resource. +// Resource name is in the label. +var MetricMasterSyncDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "sync_duration_seconds", + Help: "The duration to sync and setup all handlers for a given resource"}, + []string{ + "resource_name", + }) + // MetricMasterLeader identifies whether this instance of ovnkube-master is a leader or not var MetricMasterLeader = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: MetricOvnkubeNamespace, @@ -190,6 +201,66 @@ var metricEgressIPRebalanceCount = prometheus.NewCounter(prometheus.CounterOpts{ Help: "The total number of times assigned egress IP(s) needed to be moved to a different node"}, ) +var metricNetpolEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_event_latency_seconds", + Help: "The latency of full network policy event handling (create, delete)", + Buckets: prometheus.ExponentialBuckets(.004, 2, 15)}, + []string{ + "event", + }) + +var metricNetpolLocalPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_local_pod_event_latency_seconds", + Help: "The latency of local pod events handling (add, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + +var metricNetpolPeerPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_peer_pod_event_latency_seconds", + Help: "The latency of peer pod events handling (add, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + +var metricNetpolPeerNamespaceEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_peer_namespace_event_latency_seconds", + Help: "The latency of peer namespace events handling (add, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + +var metricNetpolPeerNamespaceAndPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_peer_namespace_and_pod_event_latency_seconds", + Help: "The latency of peer namespace events handling (add, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + +var metricPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "pod_event_latency_seconds", + Help: "The latency of pod events handling (add, update, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + var metricEgressFirewallRuleCount = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: MetricOvnkubeNamespace, Subsystem: MetricOvnkubeSubsystemMaster, @@ -288,6 +359,7 @@ const ( func RegisterMasterBase() { prometheus.MustRegister(MetricMasterLeader) prometheus.MustRegister(MetricMasterReadyDuration) + prometheus.MustRegister(MetricMasterSyncDuration) prometheus.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: MetricOvnkubeNamespace, @@ -341,9 +413,16 @@ func RegisterMasterPerformance(nbClient libovsdbclient.Client) { func RegisterMasterFunctional() { // No need to unregister because process exits when leadership is lost. prometheus.MustRegister(metricEgressIPCount) - if config.Metrics.EnableEIPScaleMetrics { + if config.Metrics.EnableScaleMetrics { + klog.Infof("Scale metrics are enabled") prometheus.MustRegister(metricEgressIPAssignLatency) prometheus.MustRegister(metricEgressIPUnassignLatency) + prometheus.MustRegister(metricNetpolEventLatency) + prometheus.MustRegister(metricNetpolLocalPodEventLatency) + prometheus.MustRegister(metricNetpolPeerPodEventLatency) + prometheus.MustRegister(metricNetpolPeerNamespaceEventLatency) + prometheus.MustRegister(metricNetpolPeerNamespaceAndPodEventLatency) + prometheus.MustRegister(metricPodEventLatency) } prometheus.MustRegister(metricEgressIPNodeUnreacheableCount) prometheus.MustRegister(metricEgressIPRebalanceCount) @@ -448,6 +527,30 @@ func RecordEgressIPRebalance(count int) { metricEgressIPRebalanceCount.Add(float64(count)) } +func RecordNetpolEvent(eventName string, duration time.Duration) { + metricNetpolEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordNetpolLocalPodEvent(eventName string, duration time.Duration) { + metricNetpolLocalPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordNetpolPeerPodEvent(eventName string, duration time.Duration) { + metricNetpolPeerPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordNetpolPeerNamespaceEvent(eventName string, duration time.Duration) { + metricNetpolPeerNamespaceEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordNetpolPeerNamespaceAndPodEvent(eventName string, duration time.Duration) { + metricNetpolPeerNamespaceAndPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordPodEvent(eventName string, duration time.Duration) { + metricPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + // UpdateEgressFirewallRuleCount records the number of Egress firewall rules. func UpdateEgressFirewallRuleCount(count float64) { metricEgressFirewallRuleCount.Add(count) diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 9e05d7d6d6..dfd8413ea6 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -348,35 +348,39 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { // Sync external gateway routes. External gateway may be set in namespaces // or via pods. So execute an individual sync method at startup - oc.cleanExGwECMPRoutes() + WithSyncDurationMetricNoError("external gateway routes", oc.cleanExGwECMPRoutes) // WatchNamespaces() should be started first because it has no other // dependencies, and WatchNodes() depends on it - if err := oc.WatchNamespaces(); err != nil { + if err := WithSyncDurationMetric("namespace", oc.WatchNamespaces); err != nil { return err } // WatchNodes must be started next because it creates the node switch // which most other watches depend on. // https://github.com/ovn-org/ovn-kubernetes/pull/859 - if err := oc.WatchNodes(); err != nil { + if err := WithSyncDurationMetric("node", oc.WatchNodes); err != nil { return err } + startSvc := time.Now() // Start service watch factory and sync services oc.svcFactory.Start(oc.stopChan) // Services should be started after nodes to prevent LB churn - if err := oc.StartServiceController(oc.wg, true); err != nil { + err := oc.StartServiceController(oc.wg, true) + endSvc := time.Since(startSvc) + metrics.MetricMasterSyncDuration.WithLabelValues("service").Set(endSvc.Seconds()) + if err != nil { return err } - if err := oc.WatchPods(); err != nil { + if err := WithSyncDurationMetric("pod", oc.WatchPods); err != nil { return err } // WatchNetworkPolicy depends on WatchPods and WatchNamespaces - if err := oc.WatchNetworkPolicy(); err != nil { + if err := WithSyncDurationMetric("network policy", oc.WatchNetworkPolicy); err != nil { return err } @@ -391,20 +395,20 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { // risk performing a bunch of modifications on the EgressIP objects when // we restart and then have these handlers act on stale data when they // sync. - if err := oc.WatchEgressIPNamespaces(); err != nil { + if err := WithSyncDurationMetric("egress ip namespace", oc.WatchEgressIPNamespaces); err != nil { return err } - if err := oc.WatchEgressIPPods(); err != nil { + if err := WithSyncDurationMetric("egress ip pod", oc.WatchEgressIPPods); err != nil { return err } - if err := oc.WatchEgressNodes(); err != nil { + if err := WithSyncDurationMetric("egress node", oc.WatchEgressNodes); err != nil { return err } - if err := oc.WatchEgressIP(); err != nil { + if err := WithSyncDurationMetric("egress ip", oc.WatchEgressIP); err != nil { return err } if util.PlatformTypeIsEgressIPCloudProvider() { - if err := oc.WatchCloudPrivateIPConfig(); err != nil { + if err := WithSyncDurationMetric("could private ip config", oc.WatchCloudPrivateIPConfig); err != nil { return err } } @@ -423,7 +427,7 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { return err } oc.egressFirewallDNS.Run(egressFirewallDNSDefaultDuration) - err = oc.WatchEgressFirewall() + err = WithSyncDurationMetric("egress firewall", oc.WatchEgressFirewall) if err != nil { return err } @@ -451,7 +455,9 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { oc.egressSvcController.Run(1) }() - klog.Infof("Completing all the Watchers took %v", time.Since(start)) + end := time.Since(start) + klog.Infof("Completing all the Watchers took %v", end) + metrics.MetricMasterSyncDuration.WithLabelValues("all watchers").Set(end.Seconds()) if config.Kubernetes.OVNEmptyLbEvents { klog.Infof("Starting unidling controllers") @@ -481,6 +487,24 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { return nil } +func WithSyncDurationMetric(resourceName string, f func() error) error { + start := time.Now() + defer func() { + end := time.Since(start) + metrics.MetricMasterSyncDuration.WithLabelValues(resourceName).Set(end.Seconds()) + }() + return f() +} + +func WithSyncDurationMetricNoError(resourceName string, f func()) { + start := time.Now() + defer func() { + end := time.Since(start) + metrics.MetricMasterSyncDuration.WithLabelValues(resourceName).Set(end.Seconds()) + }() + f() +} + type defaultNetworkControllerEventHandler struct { baseHandler baseNetworkControllerEventHandler watchFactory *factory.WatchFactory diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 00c61062f6..2d778703f5 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -2273,7 +2273,7 @@ func (oc *DefaultNetworkController) addStandByEgressIPAssignment(podKey string, // (routing pod traffic to the egress node) and NAT objects on the egress node // (SNAT-ing to the egress IP). func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { - if config.Metrics.EnableEIPScaleMetrics { + if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { if err != nil { @@ -2306,7 +2306,7 @@ func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, statu // deletePodEgressIPAssignment deletes the OVN programmed egress IP // configuration mentioned for addPodEgressIPAssignment. func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { - if config.Metrics.EnableEIPScaleMetrics { + if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { if err != nil { diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index ce6cccdfdf..1c2e2a5fde 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -13,6 +13,7 @@ import ( libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" @@ -128,6 +129,17 @@ func (oc *DefaultNetworkController) ensurePod(oldPod, pod *kapi.Pod, addPort boo if !util.PodScheduled(pod) { return nil } + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + eventName := "add" + if !addPort { + eventName = "update" + } + metrics.RecordPodEvent(eventName, duration) + }() + } if oldPod != nil && (exGatewayAnnotationsChanged(oldPod, pod) || networkStatusAnnotationsChanged(oldPod, pod)) { // No matter if a pod is ovn networked, or host networked, we still need to check for exgw @@ -157,6 +169,13 @@ func (oc *DefaultNetworkController) ensurePod(oldPod, pod *kapi.Pod, addPort boo // removePod tried to tear down a pod. It returns nil on success and error on failure; // failure indicates the pod tear down should be retried later. func (oc *DefaultNetworkController) removePod(pod *kapi.Pod, portInfo *lpInfo) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordPodEvent("delete", duration) + }() + } if util.PodWantsHostNetwork(pod) { if err := oc.deletePodExternalGW(pod); err != nil { return fmt.Errorf("unable to delete external gateway routes for pod %s: %w", diff --git a/go-controller/pkg/ovn/policy.go b/go-controller/pkg/ovn/policy.go index 863715381b..30e1463949 100644 --- a/go-controller/pkg/ovn/policy.go +++ b/go-controller/pkg/ovn/policy.go @@ -5,11 +5,14 @@ import ( "net" "strings" "sync" + "time" libovsdbclient "github.com/ovn-org/libovsdb/client" ovsdb "github.com/ovn-org/libovsdb/ovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -820,6 +823,13 @@ func (oc *DefaultNetworkController) denyPGDeletePorts(np *networkPolicy, portNam // handleLocalPodSelectorAddFunc adds a new pod to an existing NetworkPolicy, should be retriable. func (oc *DefaultNetworkController) handleLocalPodSelectorAddFunc(np *networkPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolLocalPodEvent("add", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -864,6 +874,13 @@ func (oc *DefaultNetworkController) handleLocalPodSelectorAddFunc(np *networkPol // handleLocalPodSelectorDelFunc handles delete event for local pod, should be retriable func (oc *DefaultNetworkController) handleLocalPodSelectorDelFunc(np *networkPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolLocalPodEvent("delete", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -1172,7 +1189,13 @@ func (oc *DefaultNetworkController) createNetworkPolicy(policy *knet.NetworkPoli // if addNetworkPolicy fails, create or delete operation can be retried func (oc *DefaultNetworkController) addNetworkPolicy(policy *knet.NetworkPolicy) error { klog.Infof("Adding network policy %s", getPolicyKey(policy)) - + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolEvent("add", duration) + }() + } // To not hold nsLock for the whole process on network policy creation, we do the following: // 1. save required namespace information to use for netpol create // 2. create network policy without ns Lock @@ -1277,7 +1300,13 @@ func (oc *DefaultNetworkController) buildNetworkPolicyACLs(np *networkPolicy, ac func (oc *DefaultNetworkController) deleteNetworkPolicy(policy *knet.NetworkPolicy) error { npKey := getPolicyKey(policy) klog.Infof("Deleting network policy %s", npKey) - + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolEvent("delete", duration) + }() + } // First lock and update namespace nsInfo, nsUnlock := oc.getNamespaceLocked(policy.Namespace, false) if nsInfo != nil { @@ -1372,6 +1401,13 @@ func (oc *DefaultNetworkController) cleanupNetworkPolicy(np *networkPolicy) erro // selected as a peer by a NetworkPolicy's ingress/egress section to that // ingress/egress address set func (oc *DefaultNetworkController) handlePeerPodSelectorAddUpdate(np *networkPolicy, gp *gressPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerPodEvent("add", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -1394,6 +1430,13 @@ func (oc *DefaultNetworkController) handlePeerPodSelectorAddUpdate(np *networkPo // matches a NetworkPolicy ingress/egress section's selectors from that // ingress/egress address set func (oc *DefaultNetworkController) handlePeerPodSelectorDelete(np *networkPolicy, gp *gressPolicy, podSelector labels.Selector, obj interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerPodEvent("delete", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -1479,6 +1522,13 @@ func (oc *DefaultNetworkController) addPeerPodHandler(podSelector *metav1.LabelS func (oc *DefaultNetworkController) handlePeerNamespaceAndPodAdd(np *networkPolicy, gp *gressPolicy, podSelector labels.Selector, obj interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerNamespaceAndPodEvent("add", duration) + }() + } namespace := obj.(*kapi.Namespace) np.RLock() locked := true @@ -1528,6 +1578,13 @@ func (oc *DefaultNetworkController) handlePeerNamespaceAndPodAdd(np *networkPoli } func (oc *DefaultNetworkController) handlePeerNamespaceAndPodDel(np *networkPolicy, gp *gressPolicy, obj interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerNamespaceAndPodEvent("delete", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -1593,6 +1650,13 @@ func (oc *DefaultNetworkController) addPeerNamespaceAndPodHandler(namespaceSelec } func (oc *DefaultNetworkController) handlePeerNamespaceSelectorAdd(np *networkPolicy, gp *gressPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerNamespaceEvent("add", duration) + }() + } np.RLock() if np.deleted { np.RUnlock() @@ -1623,6 +1687,13 @@ func (oc *DefaultNetworkController) handlePeerNamespaceSelectorAdd(np *networkPo } func (oc *DefaultNetworkController) handlePeerNamespaceSelectorDel(np *networkPolicy, gp *gressPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerNamespaceEvent("delete", duration) + }() + } np.RLock() if np.deleted { np.RUnlock() From fbc9e4ef66312b06326b53675d13d0474ac9cea7 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Tue, 28 Feb 2023 15:05:37 +0100 Subject: [PATCH 04/12] Add an option to enable scale metrics in kind Signed-off-by: Nadia Pinaeva --- contrib/kind.sh | 8 +++++++- dist/images/daemonset.sh | 7 +++++++ dist/images/ovnkube.sh | 8 ++++++++ dist/templates/ovnkube-master.yaml.j2 | 2 ++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/contrib/kind.sh b/contrib/kind.sh index 2bac61928b..2d5e04c7c1 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -149,6 +149,7 @@ usage() { echo "-ric | --run-in-container Configure the script to be run from a docker container, allowing it to still communicate with the kind controlplane" echo "-ehp | --egress-ip-healthcheck-port TCP port used for gRPC session by egress IP node check. DEFAULT: 9107 (Use "0" for legacy dial to port 9)." echo "-is | --ipsec Enable IPsec encryption (spawns ovn-ipsec pods)" + echo "-sm | --scale-metrics Enable scale metrics" echo "--isolated Deploy with an isolated environment (no default gateway)" echo "--delete Delete current cluster" echo "--deploy Deploy ovn kubernetes without restarting kind" @@ -295,6 +296,8 @@ parse_args() { fi OVN_EGRESSIP_HEALTHCHECK_PORT=$1 ;; + -sm | --scale-metrics ) OVN_METRICS_SCALE_ENABLE=true + ;; --isolated ) OVN_ISOLATED=true ;; -mne | --multi-network-enable ) shift @@ -361,6 +364,7 @@ print_params() { echo "OVN_EX_GW_NETWORK_INTERFACE = $OVN_EX_GW_NETWORK_INTERFACE" echo "OVN_EGRESSIP_HEALTHCHECK_PORT = $OVN_EGRESSIP_HEALTHCHECK_PORT" echo "OVN_DEPLOY_PODS = $OVN_DEPLOY_PODS" + echo "OVN_METRICS_SCALE_ENABLE = $OVN_METRICS_SCALE_ENABLE" echo "OVN_ISOLATED = $OVN_ISOLATED" echo "ENABLE_MULTI_NET = $ENABLE_MULTI_NET" echo "OVN_SEPARATE_CLUSTER_MANAGER = $OVN_SEPARATE_CLUSTER_MANAGER" @@ -510,6 +514,7 @@ set_default_params() { OVN_EGRESSIP_HEALTHCHECK_PORT=${OVN_EGRESSIP_HEALTHCHECK_PORT:-9107} OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} OVN_DEPLOY_PODS=${OVN_DEPLOY_PODS:-"ovnkube-master ovnkube-node"} + OVN_METRICS_SCALE_ENABLE=${OVN_METRICS_SCALE_ENABLE:-false} OVN_ISOLATED=${OVN_ISOLATED:-false} OVN_GATEWAY_OPTS="" if [ "$OVN_ISOLATED" == true ]; then @@ -721,7 +726,8 @@ create_ovn_kube_manifests() { --v4-join-subnet="${JOIN_SUBNET_IPV4}" \ --v6-join-subnet="${JOIN_SUBNET_IPV6}" \ --ex-gw-network-interface="${OVN_EX_GW_NETWORK_INTERFACE}" \ - --multi-network-enable=${ENABLE_MULTI_NET} + --multi-network-enable="${ENABLE_MULTI_NET}" \ + --ovnkube-metrics-scale-enable="${OVN_METRICS_SCALE_ENABLE}" popd } diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index d1b248b5c6..507def4d7d 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -77,6 +77,7 @@ OVN_HOST_NETWORK_NAMESPACE="" OVN_EX_GW_NETWORK_INTERFACE="" OVNKUBE_NODE_MGMT_PORT_NETDEV="" OVNKUBE_CONFIG_DURATION_ENABLE= +OVNKUBE_METRICS_SCALE_ENABLE= # IN_UPGRADE is true only if called by upgrade-ovn.sh during the upgrade test, # it will render only the parts in ovn-setup.yaml related to RBAC permissions. IN_UPGRADE= @@ -263,6 +264,9 @@ while [ "$1" != "" ]; do --ovnkube-config-duration-enable) OVNKUBE_CONFIG_DURATION_ENABLE=$VALUE ;; + --ovnkube-metrics-scale-enable) + OVNKUBE_METRICS_SCALE_ENABLE=$VALUE + ;; --in-upgrade) IN_UPGRADE=true ;; @@ -405,6 +409,8 @@ ovnkube_node_mgmt_port_netdev=${OVNKUBE_NODE_MGMT_PORT_NETDEV} echo "ovnkube_node_mgmt_port_netdev: ${ovnkube_node_mgmt_port_netdev}" ovnkube_config_duration_enable=${OVNKUBE_CONFIG_DURATION_ENABLE} echo "ovnkube_config_duration_enable: ${ovnkube_config_duration_enable}" +ovnkube_metrics_scale_enable=${OVNKUBE_METRICS_SCALE_ENABLE} +echo "ovnkube_metrics_scale_enable: ${ovnkube_metrics_scale_enable}" ovn_image=${image} \ ovn_image_pull_policy=${image_pull_policy} \ @@ -486,6 +492,7 @@ ovn_image=${image} \ ovnkube_logfile_maxbackups=${ovnkube_logfile_maxbackups} \ ovnkube_logfile_maxage=${ovnkube_logfile_maxage} \ ovnkube_config_duration_enable=${ovnkube_config_duration_enable} \ + ovnkube_metrics_scale_enable=${ovnkube_metrics_scale_enable} \ ovn_acl_logging_rate_limit=${ovn_acl_logging_rate_limit} \ ovn_hybrid_overlay_net_cidr=${ovn_hybrid_overlay_net_cidr} \ ovn_hybrid_overlay_enable=${ovn_hybrid_overlay_enable} \ diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index e84db01e64..bf9dec1088 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -235,6 +235,7 @@ ovnkube_node_mode=${OVNKUBE_NODE_MODE:-"full"} # OVNKUBE_NODE_MGMT_PORT_NETDEV - is the net device to be used for management port ovnkube_node_mgmt_port_netdev=${OVNKUBE_NODE_MGMT_PORT_NETDEV:-} ovnkube_config_duration_enable=${OVNKUBE_CONFIG_DURATION_ENABLE:-false} +ovnkube_metrics_scale_enable=${OVNKUBE_METRICS_SCALE_ENABLE:-false} # OVN_ENCAP_IP - encap IP to be used for OVN traffic on the node ovn_encap_ip=${OVN_ENCAP_IP:-} @@ -993,6 +994,12 @@ ovn-master() { fi echo "ovnkube_config_duration_enable_flag: ${ovnkube_config_duration_enable_flag}" + ovnkube_metrics_scale_enable_flag= + if [[ ${ovnkube_metrics_scale_enable} == "true" ]]; then + ovnkube_metrics_scale_enable_flag="--metrics-enable-scale" + fi + echo "ovnkube_metrics_scale_enable_flag: ${ovnkube_metrics_scale_enable_flag}" + echo "=============== ovn-master ========== MASTER ONLY" /usr/bin/ovnkube \ --init-master ${K8S_NODE} \ @@ -1019,6 +1026,7 @@ ovn-master() { ${egressfirewall_enabled_flag} \ ${egressqos_enabled_flag} \ ${ovnkube_config_duration_enable_flag} \ + ${ovnkube_metrics_scale_enable_flag} \ ${multi_network_enabled_flag} \ --metrics-bind-address ${ovnkube_master_metrics_bind_address} \ --host-network-namespace ${ovn_host_network_namespace} & diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index 5bf07a84e3..1cdd381e57 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -164,6 +164,8 @@ spec: value: "{{ ovnkube_logfile_maxage }}" - name: OVNKUBE_CONFIG_DURATION_ENABLE value: "{{ ovnkube_config_duration_enable }}" + - name: OVNKUBE_METRICS_SCALE_ENABLE + value: "{{ ovnkube_metrics_scale_enable }}" - name: OVN_NET_CIDR valueFrom: configMapKeyRef: From 7e5aa3943ec430e0e725e9746e83ea3a4b47a975 Mon Sep 17 00:00:00 2001 From: Ben Pickard Date: Tue, 10 Jan 2023 16:09:52 -0500 Subject: [PATCH 05/12] Add Functions needed to check for static pod Taken from https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/types/pod_update.go Adds the required functions and related code to check if a pod is a static pod Signed-off-by: Ben Pickard Correct START and END comment Signed-off-by: Ben Pickard --- go-controller/pkg/cni/utils.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/go-controller/pkg/cni/utils.go b/go-controller/pkg/cni/utils.go index 3fc455ceb1..fcde2e0e88 100644 --- a/go-controller/pkg/cni/utils.go +++ b/go-controller/pkg/cni/utils.go @@ -133,3 +133,28 @@ func PodAnnotation2PodInfo(podAnnotation map[string]string, podNADAnnotation *ut } return podInterfaceInfo, nil } + +//START taken from https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/types/pod_update.go +const ( + ConfigSourceAnnotationKey = "kubernetes.io/config.source" + // ApiserverSource identifies updates from Kubernetes API Server. + ApiserverSource = "api" +) + +// GetPodSource returns the source of the pod based on the annotation. +func GetPodSource(pod *kapi.Pod) (string, error) { + if pod.Annotations != nil { + if source, ok := pod.Annotations[ConfigSourceAnnotationKey]; ok { + return source, nil + } + } + return "", fmt.Errorf("cannot get source of pod %q", pod.UID) +} + +// IsStaticPod returns true if the pod is a static pod. +func IsStaticPod(pod *kapi.Pod) bool { + source, err := GetPodSource(pod) + return err == nil && source != ApiserverSource +} + +//END taken from https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/types/pod_update.go From 6fbb9de44b6408e7fa4911ec146ad00703d65f8c Mon Sep 17 00:00:00 2001 From: Ben Pickard Date: Wed, 11 Jan 2023 14:16:05 -0500 Subject: [PATCH 06/12] Allow static pod creation When a user creates a static pod on a running cluster, it will get stuck in containerCreating due to this check. The uid on the pod will not match the UID on mirror pod in the apiserver. We check the UID on the pod in checkOrUpdatePodUID, and in the case we always exit early, thinking that the pod was deleted and recreated, which we should not do here. We can ignore this check for static pods The OVN controller will not bind the ovs port if the iface-id-ver from the node does not match what we expect in master. This introduces logic to check if pod is static, then uses the UID in the pods metadata instead of the UID in the interface, which will not match what the apiserver has in the case of static pods Signed-off-by: Ben Pickard --- go-controller/pkg/cni/cni.go | 24 ++++++++++++++++-------- go-controller/pkg/cni/helper_linux.go | 1 - go-controller/pkg/cni/utils.go | 12 ++++++------ go-controller/pkg/cni/utils_test.go | 19 ++++++++++--------- 4 files changed, 32 insertions(+), 24 deletions(-) diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index 706d398d36..57ec7e116a 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -2,10 +2,12 @@ package cni import ( "fmt" + "net" + + kapi "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" - "net" current "github.com/containernetworking/cni/pkg/types/100" "github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa" @@ -80,11 +82,17 @@ func (pr *PodRequest) String() string { // and the best we can do is use the given UID for the duration of the request. // But if the existing UID is valid and does not match the given UID then the // sandbox request is for a different pod instance and should be terminated. -func (pr *PodRequest) checkOrUpdatePodUID(podUID string) error { - if pr.PodUID == "" { - // Runtime didn't pass UID, use the one we got from the pod object - pr.PodUID = podUID - } else if podUID != pr.PodUID { +// Static pod UID is a hash of the pod itself that does not match +// the UID of the mirror kubelet creates on the api /server. +// We will use the UID of the mirror. +// The hash is annotated in the mirror pod (kubernetes.io/config.hash) +// and we could match against it, but let's avoid that for now as it is not +// a published standard. +func (pr *PodRequest) checkOrUpdatePodUID(pod *kapi.Pod) error { + if pr.PodUID == "" || IsStaticPod(pod) { + // Runtime didn't pass UID, or the pod is a static pod, use the one we got from the pod object + pr.PodUID = string(pod.UID) + } else if string(pod.UID) != pr.PodUID { // Exit early if the pod was deleted and recreated already return fmt.Errorf("pod deleted before sandbox %v operation began", pr.Command) } @@ -142,12 +150,12 @@ func (pr *PodRequest) cmdAdd(kubeAuth *KubeAPIAuth, clientset *ClientSet, useOVS } // Get the IP address and MAC address of the pod // for DPU, ensure connection-details is present - podUID, annotations, podNADAnnotation, err := GetPodAnnotations(pr.ctx, clientset, namespace, podName, + pod, annotations, podNADAnnotation, err := GetPodWithAnnotations(pr.ctx, clientset, namespace, podName, pr.nadName, annotCondFn) if err != nil { return nil, fmt.Errorf("failed to get pod annotation: %v", err) } - if err := pr.checkOrUpdatePodUID(podUID); err != nil { + if err = pr.checkOrUpdatePodUID(pod); err != nil { return nil, err } podInterfaceInfo, err := PodAnnotation2PodInfo(annotations, podNADAnnotation, useOVSExternalIDs, pr.PodUID, vfNetdevName, diff --git a/go-controller/pkg/cni/helper_linux.go b/go-controller/pkg/cni/helper_linux.go index 7355bc89f9..f3ac2ebf33 100644 --- a/go-controller/pkg/cni/helper_linux.go +++ b/go-controller/pkg/cni/helper_linux.go @@ -354,7 +354,6 @@ func ConfigureOVS(ctx context.Context, namespace, podName, hostIfaceName string, ifaceID = util.GetSecondaryNetworkIfaceId(namespace, podName, ifInfo.NADName) } initialPodUID := ifInfo.PodUID - ipStrs := make([]string, len(ifInfo.IPs)) for i, ip := range ifInfo.IPs { ipStrs[i] = ip.String() diff --git a/go-controller/pkg/cni/utils.go b/go-controller/pkg/cni/utils.go index fcde2e0e88..06c3c8b1e8 100644 --- a/go-controller/pkg/cni/utils.go +++ b/go-controller/pkg/cni/utils.go @@ -60,8 +60,8 @@ func (c *ClientSet) getPod(namespace, name string) (*kapi.Pod, error) { } // GetPodAnnotations obtains the pod UID and annotation from the cache or apiserver -func GetPodAnnotations(ctx context.Context, getter PodInfoGetter, - namespace, name, nadName string, annotCond podAnnotWaitCond) (string, map[string]string, *util.PodAnnotation, error) { +func GetPodWithAnnotations(ctx context.Context, getter PodInfoGetter, + namespace, name, nadName string, annotCond podAnnotWaitCond) (*kapi.Pod, map[string]string, *util.PodAnnotation, error) { var notFoundCount uint for { @@ -71,23 +71,23 @@ func GetPodAnnotations(ctx context.Context, getter PodInfoGetter, if ctx.Err() == context.Canceled { detail = "canceled while" } - return "", nil, nil, fmt.Errorf("%s waiting for annotations: %w", detail, ctx.Err()) + return nil, nil, nil, fmt.Errorf("%s waiting for annotations: %w", detail, ctx.Err()) default: pod, err := getter.getPod(namespace, name) if err != nil { if !apierrors.IsNotFound(err) { - return "", nil, nil, fmt.Errorf("failed to get pod for annotations: %v", err) + return nil, nil, nil, fmt.Errorf("failed to get pod for annotations: %v", err) } // Allow up to 1 second for pod to be found notFoundCount++ if notFoundCount >= 5 { - return "", nil, nil, fmt.Errorf("timed out waiting for pod after 1s: %v", err) + return nil, nil, nil, fmt.Errorf("timed out waiting for pod after 1s: %v", err) } // drop through to try again } else if pod != nil { podNADAnnotation, ready := annotCond(pod.Annotations, nadName) if ready { - return string(pod.UID), pod.Annotations, podNADAnnotation, nil + return pod, pod.Annotations, podNADAnnotation, nil } } diff --git a/go-controller/pkg/cni/utils_test.go b/go-controller/pkg/cni/utils_test.go index 2875aa443e..aa40fa922d 100644 --- a/go-controller/pkg/cni/utils_test.go +++ b/go-controller/pkg/cni/utils_test.go @@ -3,10 +3,11 @@ package cni import ( "context" "fmt" + "time" + . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "github.com/stretchr/testify/mock" - "time" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -113,7 +114,7 @@ var _ = Describe("CNI Utils tests", func() { }) }) - Context("GetPodAnnotations", func() { + Context("GetPodWithAnnotations", func() { var podNamespaceLister mocks.PodNamespaceLister var pod *v1.Pod @@ -143,10 +144,10 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) - uid, annot, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + returnedPod, annot, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).ToNot(HaveOccurred()) Expect(annot).To(Equal(podAnnot)) - Expect(uid).To(Equal(string(pod.UID))) + Expect(string(returnedPod.UID)).To(Equal(string(pod.UID))) }) It("Returns with Error if context is canceled", func() { @@ -164,7 +165,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).To(HaveOccurred()) }) @@ -184,7 +185,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).ToNot(HaveOccurred()) }) @@ -199,7 +200,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(nil, fmt.Errorf("failed to list pods")) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("failed to list pods")) }) @@ -220,7 +221,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(nil, errors.NewNotFound(v1.Resource("pod"), name)) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).ToNot(HaveOccurred()) }) @@ -235,7 +236,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(nil, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(nil, errors.NewNotFound(v1.Resource("pod"), name)) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("timed out waiting for pod after 1s")) }) From aa2520578e6d6bc801e7d271d8645cdbb5331a48 Mon Sep 17 00:00:00 2001 From: Ben Pickard Date: Mon, 23 Jan 2023 18:21:29 -0500 Subject: [PATCH 07/12] Add e2e test for static pods Add e2e test for creating static pods on cluster Signed-off-by: Ben Pickard --- test/e2e/static_pods.go | 113 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 test/e2e/static_pods.go diff --git a/test/e2e/static_pods.go b/test/e2e/static_pods.go new file mode 100644 index 0000000000..767a28e000 --- /dev/null +++ b/test/e2e/static_pods.go @@ -0,0 +1,113 @@ +package e2e + +import ( + "fmt" + "io/ioutil" + "os" + "time" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" +) + +// pulled from https://github.com/kubernetes/kubernetes/blob/v1.26.2/test/e2e/framework/pod/wait.go#L468 +// had to modify function due to restart policy on static pods being set to always, which caused function to fail +func waitForPodRunningInNamespaceTimeout(c clientset.Interface, podName, namespace string, timeout time.Duration) error { + return e2epod.WaitForPodCondition(c, namespace, podName, fmt.Sprintf("%s", v1.PodRunning), timeout, func(pod *v1.Pod) (bool, error) { + switch pod.Status.Phase { + case v1.PodRunning: + ginkgo.By("Saw pod running") + return true, nil + default: + return false, nil + } + }) +} + +func createStaticPod(f *framework.Framework, nodeName string, podYaml string) { + //create file + var podFile = "static-pod.yaml" + if err := ioutil.WriteFile(podFile, []byte(podYaml), 0644); err != nil { + framework.Failf("Unable to write static-pod.yaml to disk: %v", err) + } + defer func() { + if err := os.Remove(podFile); err != nil { + framework.Logf("Unable to remove the static-pod.yaml from disk: %v", err) + } + }() + var dst = fmt.Sprintf("%s:/etc/kubernetes/manifests/%s", nodeName, podFile) + cmd := []string{"docker", "cp", podFile, dst} + framework.Logf("Running command %v", cmd) + _, err := runCommand(cmd...) + if err != nil { + framework.Failf("failed to copy pod file to node %s", nodeName) + } + +} + +func removeStaticPodFile(nodeName string, podFile string) { + cmd := []string{"docker", "exec", nodeName, "bash", "-c", "rm /etc/kubernetes/manifests/static-pod.yaml"} + framework.Logf("Running command %v", cmd) + _, err := runCommand(cmd...) + if err != nil { + framework.Failf("failed to remove pod file from node %s", nodeName) + } + +} + +//This test does the following +//Applies a static-pod.yaml file to a nodes /etc/kubernetes/manifest dir +//Expects the static pod to succeed +var _ = ginkgo.Describe("Creating a static pod on a node", func() { + + const ( + podName string = "static-pod" + podFile string = "static-pod.yaml" + agnhostImage string = "k8s.gcr.io/e2e-test-images/agnhost:2.26" + ) + + f := wrappedTestFramework("staticpods") + + var cs clientset.Interface + + ginkgo.BeforeEach(func() { + cs = f.ClientSet + }) + + ginkgo.It("Should successfully create then remove a static pod", func() { + nodes, err := e2enode.GetBoundedReadySchedulableNodes(cs, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 1 { + framework.Failf("Test requires 1 Ready node, but there are none") + } + nodeName := nodes.Items[0].Name + podName := fmt.Sprintf("static-pod-%s", nodeName) + + ginkgo.By("copying a pod.yaml file into the /etc/kubernetes/manifests dir of a node") + framework.Logf("creating %s on node %s", podName, nodeName) + var staticPodYaml = fmt.Sprintf(`apiVersion: v1 +kind: Pod +metadata: + name: static-pod + namespace: %s +spec: + containers: + - name: web + image: %s + command: ["/bin/bash", "-c", "trap : TERM INT; sleep infinity & wait"] +`, f.Namespace.Name, agnhostImage) + createStaticPod(f, nodeName, staticPodYaml) + err = waitForPodRunningInNamespaceTimeout(f.ClientSet, podName, f.Namespace.Name, time.Second*30) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By("Removing the pod file from the nodes /etc/kubernetes/manifests") + framework.Logf("Removing %s from %s", podName, nodeName) + removeStaticPodFile(nodeName, podFile) + err = e2epod.WaitForPodNotFoundInNamespace(f.ClientSet, podName, f.Namespace.Name, time.Second*30) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) +}) From e84b60bc4bdb24f41ccf2a6af2782a447141ccc3 Mon Sep 17 00:00:00 2001 From: Ben Pickard Date: Wed, 25 Jan 2023 16:22:45 -0500 Subject: [PATCH 08/12] Fix node_dpu_test Node dpu test is currently mocking podNameSpaceLister incorrectly. podNameSpaceLister is expecting a pointer to a pod, not a real pod object. This fix corrects this Signed-off-by: Ben Pickard --- .../base_node_network_controller_dpu_test.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/go-controller/pkg/node/base_node_network_controller_dpu_test.go b/go-controller/pkg/node/base_node_network_controller_dpu_test.go index b7a511288f..d6d34be39a 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu_test.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu_test.go @@ -175,7 +175,7 @@ var _ = Describe("Node DPU tests", func() { It("Fails if dpu.connection-details Pod annotation is not present", func() { pod.Annotations = map[string]string{} - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("failed to get dpu annotation")) @@ -198,7 +198,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) // call addRepPort() err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) @@ -226,7 +226,7 @@ var _ = Describe("Node DPU tests", func() { execMock.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) // call addRepPort() err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) @@ -282,7 +282,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) @@ -298,7 +298,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) @@ -315,7 +315,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) @@ -337,7 +337,7 @@ var _ = Describe("Node DPU tests", func() { Expect(err).ToNot(HaveOccurred()) kubeMock.On("UpdatePod", cpod).Return(nil) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err = dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).ToNot(HaveOccurred()) @@ -363,7 +363,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err = dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) From dafe82b98c33a66bdb1e97ea26a4fa1cd146a141 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Mon, 6 Mar 2023 18:11:16 +0100 Subject: [PATCH 09/12] Add egress firewall external id to make name+externalIDs unique even when the acl name is cropped. That happens when namespace name is longer than 43 symbols. Signed-off-by: Nadia Pinaeva --- go-controller/pkg/ovn/egressfirewall.go | 8 +- go-controller/pkg/ovn/egressfirewall_test.go | 171 +++++++++++++++++-- 2 files changed, 161 insertions(+), 18 deletions(-) diff --git a/go-controller/pkg/ovn/egressfirewall.go b/go-controller/pkg/ovn/egressfirewall.go index 3c001861b6..9fd532e360 100644 --- a/go-controller/pkg/ovn/egressfirewall.go +++ b/go-controller/pkg/ovn/egressfirewall.go @@ -28,7 +28,8 @@ const ( egressFirewallAppliedCorrectly = "EgressFirewall Rules applied" egressFirewallAddError = "EgressFirewall Rules not correctly added" // egressFirewallACLExtIdKey external ID key for egress firewall ACLs - egressFirewallACLExtIdKey = "egressFirewall" + egressFirewallACLExtIdKey = "egressFirewall" + egressFirewallACLPriorityKey = "priority" ) type egressFirewall struct { @@ -403,7 +404,10 @@ func (oc *DefaultNetworkController) createEgressFirewallRules(priority int, matc aclLogging, // since egressFirewall has direction to-lport, set type to ingress lportIngress, - map[string]string{egressFirewallACLExtIdKey: externalID}, + map[string]string{ + egressFirewallACLExtIdKey: externalID, + egressFirewallACLPriorityKey: fmt.Sprintf("%d", priority), + }, ) ops, err := libovsdbops.CreateOrUpdateACLsOps(oc.nbClient, nil, egressFirewallACL) if err != nil { diff --git a/go-controller/pkg/ovn/egressfirewall_test.go b/go-controller/pkg/ovn/egressfirewall_test.go index 503eac974a..3b1855b44c 100644 --- a/go-controller/pkg/ovn/egressfirewall_test.go +++ b/go-controller/pkg/ovn/egressfirewall_test.go @@ -202,8 +202,26 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { // Both ACLs will be removed from the node switch nodeSwitch.ACLs = nil + // keepACL will be re-created with the new external ID + asHash, _ := getNsAddrSetHashNames(namespace1.Name) + newKeepACL := libovsdbops.BuildACL( + buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority), + nbdb.ACLDirectionToLport, + t.EgressFirewallStartPriority, + "(ip4.dst == 1.2.3.4/23) && ip4.src == $"+asHash, + nbdb.ACLActionAllow, + t.OvnACLLoggingMeter, + "", + false, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, + nil, + ) + newKeepACL.UUID = "newKeepACL-UUID" // keepACL will be added to the clusterPortGroup - clusterPortGroup.ACLs = []string{keepACL.UUID} + clusterPortGroup.ACLs = []string{newKeepACL.UUID} // Direction of both ACLs will be converted to keepACL.Direction = nbdb.ACLDirectionToLport @@ -211,10 +229,6 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { keepACL.Name = &newName // check severity was reset from default to nil keepACL.Severity = nil - // subnet exclusion will be deleted - asHash, _ := getNsAddrSetHashNames(namespace1.Name) - keepACL.Match = "(ip4.dst == 1.2.3.4/23) && ip4.src == $" + asHash - // purgeACL ACL will be deleted when test server starts deleting dereferenced ACLs // for now we need to update its fields, since it is present in the db purgeACL.Direction = nbdb.ACLDirectionToLport @@ -225,6 +239,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { expectedDatabaseState := []libovsdb.TestData{ otherACL, purgeACL, + newKeepACL, keepACL, nodeSwitch, joinSwitch, @@ -290,7 +305,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -358,7 +376,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv6ACL.UUID = "ipv6ACL-UUID" @@ -435,7 +456,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) udpACL.UUID = "udpACL-UUID" @@ -510,7 +534,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -593,7 +620,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -819,7 +849,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{"egressFirewall": "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -898,7 +931,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -1006,7 +1042,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -1109,7 +1148,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -1197,7 +1239,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{"egressFirewall": namespace1.Name}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -1267,7 +1312,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) acl.UUID = "acl-UUID" @@ -1282,6 +1330,97 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + ginkgo.It(fmt.Sprintf("correctly creates an egressfirewall for namespace name > 43 symbols, gateway mode %s", gwMode), func() { + app.Action = func(ctx *cli.Context) error { + // 52 characters namespace + namespace1 := *newNamespace("abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz") + egressFirewall := newEgressFirewallObject("default", namespace1.Name, []egressfirewallapi.EgressFirewallRule{ + { + Type: "Allow", + To: egressfirewallapi.EgressFirewallDestination{ + CIDRSelector: "1.2.3.5/23", + }, + }, + { + Type: "Allow", + To: egressfirewallapi.EgressFirewallDestination{ + CIDRSelector: "2.2.3.5/23", + }, + }, + }) + + fakeOVN.startWithDBSetup(dbSetup, + &egressfirewallapi.EgressFirewallList{ + Items: []egressfirewallapi.EgressFirewall{ + *egressFirewall, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespace1, + }, + }) + + err := fakeOVN.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOVN.controller.WatchEgressFirewall() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + asHash, _ := getNsAddrSetHashNames(namespace1.Name) + ipv4ACL1 := libovsdbops.BuildACL( + buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority), + nbdb.ACLDirectionToLport, + t.EgressFirewallStartPriority, + "(ip4.dst == 1.2.3.5/23) && ip4.src == $"+asHash, + nbdb.ACLActionAllow, + t.OvnACLLoggingMeter, + "", + false, + map[string]string{ + egressFirewallACLExtIdKey: namespace1.Name, + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, + nil, + ) + ipv4ACL1.UUID = "ipv4ACL1-UUID" + ipv4ACL2 := libovsdbops.BuildACL( + buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority-1), + nbdb.ACLDirectionToLport, + t.EgressFirewallStartPriority-1, + "(ip4.dst == 2.2.3.5/23) && ip4.src == $"+asHash, + nbdb.ACLActionAllow, + t.OvnACLLoggingMeter, + "", + false, + map[string]string{ + egressFirewallACLExtIdKey: namespace1.Name, + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority-1), + }, + nil, + ) + ipv4ACL2.UUID = "ipv4ACL2-UUID" + + // new ACL will be added to the port group + clusterPortGroup.ACLs = []string{ipv4ACL1.UUID, ipv4ACL2.UUID} + expectedDatabaseState := append(initialData, ipv4ACL1, ipv4ACL2) + + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + err = fakeOVN.fakeClient.EgressFirewallClient.K8sV1().EgressFirewalls(egressFirewall.Namespace).Delete(context.TODO(), egressFirewall.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ACL should be removed from the port group egfw is deleted + clusterPortGroup.ACLs = []string{} + // this ACL will be deleted when test server starts deleting dereferenced ACLs + expectedDatabaseState = append(initialData, ipv4ACL1, ipv4ACL2) + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) } }) }) From d996d0e8d28e72ee40b5ff29afb88965cf2b4f8a Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Mon, 6 Mar 2023 23:28:57 +0100 Subject: [PATCH 10/12] Add test to showcase syncEgressFirewall isn't truncating ACL names This commit adds a test to showcase that since syncEgressFirewall isn't calling libovsdbops.BuildACL directly, we are not truncating ACL names. Note that we really need https://github.com/ovn-org/libovsdb/issues/338 for our test server to start screaming for long names. Signed-off-by: Surya Seetharaman --- go-controller/pkg/ovn/egressfirewall_test.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/go-controller/pkg/ovn/egressfirewall_test.go b/go-controller/pkg/ovn/egressfirewall_test.go index 3b1855b44c..3faf2979ae 100644 --- a/go-controller/pkg/ovn/egressfirewall_test.go +++ b/go-controller/pkg/ovn/egressfirewall_test.go @@ -1406,6 +1406,20 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + // NOTE1: syncEgressFirewall is not calling libovsdbops.BuildACL and directly calls CreateOrUpdateACLs + // it doesn't truncate long names for acls if they are over 63 and we run into errors: + // E0228 09:54:13.167495 1 factory.go:567] Failed (will retry) in processExisting [0xc00176a000]: + // unable to update ACL information (direction and logging) during resync operation, err: error in transact + // with ops constraint violation: "egressFirewall_allow-traffic-apache-server-on-lbdns-node-run1-1_9999" + // length 68 is greater than maximum allowed length 63]: 1 ovsdb operations failed + // NOTE2: This is not caught by testing because our test server + // is not smart enough. See https://github.com/ovn-org/libovsdb/issues/338 + err = fakeOVN.controller.syncEgressFirewall([]interface{}{*egressFirewall}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + *ipv4ACL1.Name = buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority) // we end up resetting the name to long value + *ipv4ACL2.Name = buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority-1) // we end up resetting the name to long value + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + err = fakeOVN.fakeClient.EgressFirewallClient.K8sV1().EgressFirewalls(egressFirewall.Namespace).Delete(context.TODO(), egressFirewall.Name, *metav1.NewDeleteOptions(0)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) From 7bfe50e5bccb3e88f5ce806a66b71ceda39fcf46 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Mon, 6 Mar 2023 23:36:04 +0100 Subject: [PATCH 11/12] Fix syncEgressFirewall to truncate ACL names This commit ensures we truncate names as a precaution also in CreateOrUpdateACLsOps so that our bases are covered since not all code snippets call BuildACL directly Signed-off-by: Surya Seetharaman --- go-controller/pkg/libovsdbops/acl.go | 5 +++++ go-controller/pkg/ovn/egressfirewall_test.go | 12 ++---------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/go-controller/pkg/libovsdbops/acl.go b/go-controller/pkg/libovsdbops/acl.go index 8a8b2de7a5..ce32fd78d5 100644 --- a/go-controller/pkg/libovsdbops/acl.go +++ b/go-controller/pkg/libovsdbops/acl.go @@ -102,6 +102,11 @@ func CreateOrUpdateACLsOps(nbClient libovsdbclient.Client, ops []libovsdb.Operat for i := range acls { // can't use i in the predicate, for loop replaces it in-memory acl := acls[i] + // ensure names are truncated (let's cover our bases from snippets that don't call BuildACL and call this directly) + if acl.Name != nil { + // node ACLs won't have names set + *acl.Name = fmt.Sprintf("%.63s", *acl.Name) + } opModel := operationModel{ Model: acl, ModelPredicate: func(item *nbdb.ACL) bool { return isEquivalentACL(item, acl) }, diff --git a/go-controller/pkg/ovn/egressfirewall_test.go b/go-controller/pkg/ovn/egressfirewall_test.go index 3faf2979ae..f61cd04ed7 100644 --- a/go-controller/pkg/ovn/egressfirewall_test.go +++ b/go-controller/pkg/ovn/egressfirewall_test.go @@ -1406,18 +1406,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - // NOTE1: syncEgressFirewall is not calling libovsdbops.BuildACL and directly calls CreateOrUpdateACLs - // it doesn't truncate long names for acls if they are over 63 and we run into errors: - // E0228 09:54:13.167495 1 factory.go:567] Failed (will retry) in processExisting [0xc00176a000]: - // unable to update ACL information (direction and logging) during resync operation, err: error in transact - // with ops constraint violation: "egressFirewall_allow-traffic-apache-server-on-lbdns-node-run1-1_9999" - // length 68 is greater than maximum allowed length 63]: 1 ovsdb operations failed - // NOTE2: This is not caught by testing because our test server - // is not smart enough. See https://github.com/ovn-org/libovsdb/issues/338 + // NOTE: syncEgressFirewall is not calling libovsdbops.BuildACL and directly calls CreateOrUpdateACLs + // This part of test ensures syncEgressFirewall code path is tested well and that we truncate the ACL names correctly err = fakeOVN.controller.syncEgressFirewall([]interface{}{*egressFirewall}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - *ipv4ACL1.Name = buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority) // we end up resetting the name to long value - *ipv4ACL2.Name = buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority-1) // we end up resetting the name to long value gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) err = fakeOVN.fakeClient.EgressFirewallClient.K8sV1().EgressFirewalls(egressFirewall.Namespace).Delete(context.TODO(), egressFirewall.Name, *metav1.NewDeleteOptions(0)) From 935bc5570cb6661beb58e98288cae33c13bd6a8e Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Tue, 7 Mar 2023 12:56:15 +0100 Subject: [PATCH 12/12] Don't recreate clusterPGs and clusterRtrPGs unless needed In SetupMaster, we always call CreateOrUpdatePortGroupsOps with empty ACLs and PGs for the cluster-wide port group and cluster-wide-router-PG. This is disruptive during upgrades since momentarily all efw ACLs and multicast ACLs will be wiped out. This commit changes this to first check if the PG already exists, if then no need to do anything. Each of those features are responsible for ensuring ACLs, Ports are good on those PGs they own. NOTE: This bug was an issue for multicast and started being an issue for egf from https://github.com/ovn-org/ovn-kubernetes/pull/3338/commits/bd29f417b43c257897f27badc64af38df78b99bd Before that we didn't have ACLs on cluster wide PG. Signed-off-by: Surya Seetharaman --- go-controller/pkg/ovn/master.go | 45 ++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 248fed6b18..b37833f6a3 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -14,6 +14,7 @@ import ( "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" @@ -118,23 +119,43 @@ func (oc *DefaultNetworkController) SetupMaster(existingNodeNames []string) erro } oc.defaultCOPPUUID = *(logicalRouter.Copp) - // Create a cluster-wide port group that all logical switch ports are part of - pg := libovsdbops.BuildPortGroup(types.ClusterPortGroupName, types.ClusterPortGroupName, nil, nil) - err = libovsdbops.CreateOrUpdatePortGroups(oc.nbClient, pg) - if err != nil { - klog.Errorf("Failed to create cluster port group: %v", err) + pg := &nbdb.PortGroup{ + Name: types.ClusterPortGroupName, + } + pg, err = libovsdbops.GetPortGroup(oc.nbClient, pg) + if err != nil && err != libovsdbclient.ErrNotFound { return err } + if pg == nil { + // we didn't find an existing clusterPG, let's create a new empty PG (fresh cluster install) + // Create a cluster-wide port group that all logical switch ports are part of + pg := libovsdbops.BuildPortGroup(types.ClusterPortGroupName, types.ClusterPortGroupName, nil, nil) + err = libovsdbops.CreateOrUpdatePortGroups(oc.nbClient, pg) + if err != nil { + klog.Errorf("Failed to create cluster port group: %v", err) + return err + } + } - // Create a cluster-wide port group with all node-to-cluster router - // logical switch ports. Currently the only user is multicast but it might - // be used for other features in the future. - pg = libovsdbops.BuildPortGroup(types.ClusterRtrPortGroupName, types.ClusterRtrPortGroupName, nil, nil) - err = libovsdbops.CreateOrUpdatePortGroups(oc.nbClient, pg) - if err != nil { - klog.Errorf("Failed to create cluster port group: %v", err) + pg = &nbdb.PortGroup{ + Name: types.ClusterRtrPortGroupName, + } + pg, err = libovsdbops.GetPortGroup(oc.nbClient, pg) + if err != nil && err != libovsdbclient.ErrNotFound { return err } + if pg == nil { + // we didn't find an existing clusterRtrPG, let's create a new empty PG (fresh cluster install) + // Create a cluster-wide port group with all node-to-cluster router + // logical switch ports. Currently the only user is multicast but it might + // be used for other features in the future. + pg = libovsdbops.BuildPortGroup(types.ClusterRtrPortGroupName, types.ClusterRtrPortGroupName, nil, nil) + err = libovsdbops.CreateOrUpdatePortGroups(oc.nbClient, pg) + if err != nil { + klog.Errorf("Failed to create cluster port group: %v", err) + return err + } + } // If supported, enable IGMP relay on the router to forward multicast // traffic between nodes.