From 809ad11181db3b12a9047719cbbdb173acb6a752 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 19 Jan 2026 15:11:45 -0500 Subject: [PATCH 01/59] Use `nft destroy` to simplify the UDN cleanup code Signed-off-by: Dan Winship --- go-controller/pkg/node/gateway_shared_intf.go | 30 ++----------------- go-controller/pkg/node/nftables/helpers.go | 2 +- go-controller/pkg/node/nftables/util.go | 5 +--- 3 files changed, 5 insertions(+), 32 deletions(-) diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index ea09c43bb2..dcd478ef15 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -827,6 +827,9 @@ func delServiceRules(service *corev1.Service, localEndpoints util.PortToLBEndpoi } nftElems := getGatewayNFTRules(service, localEndpoints, true) nftElems = append(nftElems, getGatewayNFTRules(service, localEndpoints, false)...) + if util.IsNetworkSegmentationSupportEnabled() { + nftElems = append(nftElems, getUDNNFTRules(service, nil)...) + } if len(nftElems) > 0 { if err := nodenft.DeleteNFTElements(nftElems); err != nil { err = fmt.Errorf("failed to delete nftables rules for service %s/%s: %v", @@ -834,33 +837,6 @@ func delServiceRules(service *corev1.Service, localEndpoints util.PortToLBEndpoi errors = append(errors, err) } } - - if util.IsNetworkSegmentationSupportEnabled() { - // NOTE: The code below is not using nodenft.DeleteNFTElements because it first adds elements - // before removing them, which fails for UDN NFT rules. These rules only have map keys, - // not key-value pairs, making it impossible to add. - // Attempt to delete the elements directly and handle the IsNotFound error. - // - // TODO: Switch to `nft destroy` when supported. - nftElems = getUDNNFTRules(service, nil) - if len(nftElems) > 0 { - nft, err := nodenft.GetNFTablesHelper() - if err != nil { - return utilerrors.Join(append(errors, err)...) - } - - tx := nft.NewTransaction() - for _, elem := range nftElems { - tx.Delete(elem) - } - - if err := nft.Run(context.TODO(), tx); err != nil && !knftables.IsNotFound(err) { - err = fmt.Errorf("failed to delete nftables rules for UDN service %s/%s: %v", - service.Namespace, service.Name, err) - errors = append(errors, err) - } - } - } } return utilerrors.Join(errors...) diff --git a/go-controller/pkg/node/nftables/helpers.go b/go-controller/pkg/node/nftables/helpers.go index 3e8ed11ff4..07873378d6 100644 --- a/go-controller/pkg/node/nftables/helpers.go +++ b/go-controller/pkg/node/nftables/helpers.go @@ -28,7 +28,7 @@ func SetFakeNFTablesHelper() *knftables.Fake { // called, it will create a "real" knftables.Interface func GetNFTablesHelper() (knftables.Interface, error) { if nftHelper == nil { - nft, err := knftables.New(knftables.InetFamily, OVNKubernetesNFTablesName) + nft, err := knftables.New(knftables.InetFamily, OVNKubernetesNFTablesName, knftables.RequireDestroy) if err != nil { return nil, err } diff --git a/go-controller/pkg/node/nftables/util.go b/go-controller/pkg/node/nftables/util.go index 1a4a3bdd21..ce14186e9f 100644 --- a/go-controller/pkg/node/nftables/util.go +++ b/go-controller/pkg/node/nftables/util.go @@ -34,10 +34,7 @@ func DeleteNFTElements(elements []*knftables.Element) error { tx := nft.NewTransaction() for _, elem := range elements { - // We add+delete the elements, rather than just deleting them, so that if - // they weren't already in the set/map, we won't get an error on delete. - tx.Add(elem) - tx.Delete(elem) + tx.Destroy(elem) } return nft.Run(context.TODO(), tx) } From 1d5e616ba0d8a20e17f91fa0da3fe26b03aa359c Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 5 Dec 2024 09:23:34 -0500 Subject: [PATCH 02/59] Improve nodenft.MatchNFTRules behavior Ignore whitespace differences. Sort the output back into the "correct" order. Signed-off-by: Dan Winship --- go-controller/pkg/node/nftables/testing.go | 60 ++++++++++--- .../pkg/node/nftables/testing_test.go | 86 +++++++++++++++++++ 2 files changed, 133 insertions(+), 13 deletions(-) create mode 100644 go-controller/pkg/node/nftables/testing_test.go diff --git a/go-controller/pkg/node/nftables/testing.go b/go-controller/pkg/node/nftables/testing.go index ad377caeca..ba42069e25 100644 --- a/go-controller/pkg/node/nftables/testing.go +++ b/go-controller/pkg/node/nftables/testing.go @@ -5,34 +5,68 @@ package nftables import ( "fmt" + "slices" + "sort" "strings" "k8s.io/apimachinery/pkg/util/sets" ) // MatchNFTRules checks that the expected nftables rules match the actual ones, ignoring -// order. +// order and extra whitespace. func MatchNFTRules(expected, actual string) error { - expectedSet := sets.New(strings.Split(expected, "\n")...) - actualSet := sets.New(strings.Split(actual, "\n")...) - - // ignore blank lines - expectedSet.Delete("") - actualSet.Delete("") - - missing := expectedSet.Difference(actualSet) - extra := actualSet.Difference(expectedSet) - + missing, extra := diffNFTRules(expected, actual) if len(missing) == 0 && len(extra) == 0 { return nil } msg := "nftables rule mismatch:" if len(missing) > 0 { - msg += fmt.Sprintf("\nMissing rules: %v\n", missing.UnsortedList()) + msg += fmt.Sprintf("\nRules missing from `nft dump ruleset`:\n%s\n", strings.Join(missing, "\n")) } if len(extra) > 0 { - msg += fmt.Sprintf("\nExtra rules: %v\n", extra.UnsortedList()) + msg += fmt.Sprintf("\nUnexpected extra rules in `nft dump ruleset`:\n%s\n", strings.Join(extra, "\n")) } return fmt.Errorf("%s", msg) } + +// helper function, for ease of unit testing +func diffNFTRules(expected, actual string) (missing, extra []string) { + expectedLines := strings.Split(expected, "\n") + expectedSet := sets.New[string]() + for _, line := range expectedLines { + line = strings.TrimSpace(line) + if line != "" { + expectedSet.Insert(line) + } + } + + actualLines := strings.Split(actual, "\n") + actualSet := sets.New[string]() + for _, line := range actualLines { + line = strings.TrimSpace(line) + if line != "" { + actualSet.Insert(line) + } + } + + missingSet := expectedSet.Difference(actualSet) + extraSet := actualSet.Difference(expectedSet) + + // While we ignore order for purposes of the comparison, it's confusing to output + // the missing/extra rules in essentially random order (and makes it harder to see + // what the problem is in cases like "the rules are basically correct, except that + // they have the wrong IP"). So we sort the `missing` rules back into the same + // order as they appeared in `expected`, and the `extra` rules into the same order + // as they appeared in `actual`. + missingSorted := missingSet.UnsortedList() + sort.Slice(missingSorted, func(i, j int) bool { + return slices.Index(expectedLines, missingSorted[i]) < slices.Index(expectedLines, missingSorted[j]) + }) + extraSorted := extraSet.UnsortedList() + sort.Slice(extraSorted, func(i, j int) bool { + return slices.Index(actualLines, extraSorted[i]) < slices.Index(actualLines, extraSorted[j]) + }) + + return missingSorted, extraSorted +} diff --git a/go-controller/pkg/node/nftables/testing_test.go b/go-controller/pkg/node/nftables/testing_test.go new file mode 100644 index 0000000000..d0ce907d23 --- /dev/null +++ b/go-controller/pkg/node/nftables/testing_test.go @@ -0,0 +1,86 @@ +//go:build linux +// +build linux + +package nftables + +import ( + "reflect" + "testing" +) + +func Test_diffNFTRules(t *testing.T) { + for _, tc := range []struct { + name string + expected string + actual string + missing []string + extra []string + }{ + { + name: "empty match", + expected: "", + actual: "", + missing: []string{}, + extra: []string{}, + }, + { + name: "non-empty match", + expected: "line one\nline two\nline three\n", + actual: "line three\nline one\nline two\n", + missing: []string{}, + extra: []string{}, + }, + { + name: "match with extra whitespace", + expected: " line one\n line two\n line three\n", + actual: "\nline three\nline one\nline two\n\n", + missing: []string{}, + extra: []string{}, + }, + { + name: "missing lines", + expected: "line one\nline two\nline three\nline four\n", + actual: "line two\nline four\n", + missing: []string{"line one", "line three"}, + extra: []string{}, + }, + { + name: "missing lines, alternate order", + expected: "line one\nline two\nline three\nline four\n", + actual: "line four\nline two\n", + missing: []string{"line one", "line three"}, + extra: []string{}, + }, + { + name: "extra lines", + expected: "line two\nline four\n", + actual: "line one\nline two\nline three\nline four\n", + missing: []string{}, + extra: []string{"line one", "line three"}, + }, + { + name: "extra lines, alternate order", + expected: "line four\nline two\n", + actual: "line one\nline two\nline three\nline four\n", + missing: []string{}, + extra: []string{"line one", "line three"}, + }, + { + name: "missing and extra lines, inconsistent whitespace", + expected: " line one\n line two\n line three\n", + actual: " line two\n line two-and-a-half\nline three", + missing: []string{"line one"}, + extra: []string{"line two-and-a-half"}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + missing, extra := diffNFTRules(tc.expected, tc.actual) + if !reflect.DeepEqual(tc.missing, missing) { + t.Errorf("expected missing=%#v, got %#v", tc.missing, missing) + } + if !reflect.DeepEqual(tc.extra, extra) { + t.Errorf("expected extra=%#v, got %#v", tc.extra, extra) + } + }) + } +} From 0974f2e82a9a33164357ade3a932592b799ce6c8 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Tue, 27 Jan 2026 02:54:10 -0800 Subject: [PATCH 03/59] Unify the metrics servers used by ovnkube-node and OVS/OVN metrics Replace the custom HTTP server in StartMetricsServer with MetricServer. Signed-off-by: Lei Huang --- go-controller/pkg/metrics/metrics.go | 95 +++++------------------- go-controller/pkg/metrics/ovn.go | 2 +- go-controller/pkg/metrics/ovn_db.go | 2 +- go-controller/pkg/metrics/server.go | 61 +++++++++++---- go-controller/pkg/metrics/server_test.go | 66 +++++++++++++++- 5 files changed, 129 insertions(+), 97 deletions(-) diff --git a/go-controller/pkg/metrics/metrics.go b/go-controller/pkg/metrics/metrics.go index 89b0fa896f..704ae11955 100644 --- a/go-controller/pkg/metrics/metrics.go +++ b/go-controller/pkg/metrics/metrics.go @@ -2,11 +2,9 @@ package metrics import ( "context" - "crypto/tls" "fmt" "io" "net/http" - "net/http/pprof" "os" "path" "regexp" @@ -16,11 +14,8 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - utilwait "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" @@ -417,26 +412,6 @@ func CheckPodRunsOnGivenNode(clientset kubernetes.Interface, labels []string, k8 strings.Join(labels, ","), k8sNodeName) } -// using the cyrpto/tls module's GetCertificate() callback function helps in picking up -// the latest certificate (due to cert rotation on cert expiry) -func getTLSServer(addr, certFile, privKeyFile string, handler http.Handler) *http.Server { - tlsConfig := &tls.Config{ - GetCertificate: func(_ *tls.ClientHelloInfo) (*tls.Certificate, error) { - cert, err := tls.LoadX509KeyPair(certFile, privKeyFile) - if err != nil { - return nil, fmt.Errorf("error generating x509 certs for metrics TLS endpoint: %v", err) - } - return &cert, nil - }, - } - server := &http.Server{ - Addr: addr, - Handler: handler, - TLSConfig: tlsConfig, - } - return server -} - // stringFlagSetterFunc is a func used for setting string type flag. type stringFlagSetterFunc func(string) (string, error) @@ -482,25 +457,28 @@ func writePlainText(statusCode int, text string, w http.ResponseWriter) { fmt.Fprintln(w, text) } -// StartMetricsServer runs the prometheus listener so that OVN K8s metrics can be collected -// It puts the endpoint behind TLS if certFile and keyFile are defined. +// StartMetricsServer runs the prometheus listener so that OVN K8s metrics can be collected. +// It now reuses the unified MetricServer implementation so it can share plumbing with the +// OVN/OVS metrics server. TLS and pprof behaviour remain unchanged. func StartMetricsServer(bindAddress string, enablePprof bool, certFile string, keyFile string, stopChan <-chan struct{}, wg *sync.WaitGroup) { - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.Handler()) - - if enablePprof { - mux.HandleFunc("/debug/pprof/", pprof.Index) - mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) - mux.HandleFunc("/debug/pprof/profile", pprof.Profile) - mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) - mux.HandleFunc("/debug/pprof/trace", pprof.Trace) - - // Allow changes to log level at runtime - mux.HandleFunc("/debug/flags/v", stringFlagPutHandler(klogSetter)) + opts := MetricServerOptions{ + BindAddress: bindAddress, + CertFile: certFile, + KeyFile: keyFile, + EnablePprof: enablePprof, + // Use default registry/gatherer so existing metric registrations keep working. + Registerer: prometheus.DefaultRegisterer, + Gatherer: prometheus.DefaultGatherer, } - startMetricsServer(bindAddress, certFile, keyFile, mux, stopChan, wg) + server := NewMetricServer(opts, nil, nil) + + wg.Add(1) + go func() { + defer wg.Done() + server.Run(stopChan) + }() } // StartOVNMetricsServer runs the prometheus listener so that OVN metrics can be collected @@ -522,40 +500,3 @@ func StartOVNMetricsServer(opts MetricServerOptions, return metricsServer } - -func startMetricsServer(bindAddress, certFile, keyFile string, handler http.Handler, stopChan <-chan struct{}, wg *sync.WaitGroup) { - var server *http.Server - wg.Add(1) - go func() { - defer wg.Done() - utilwait.Until(func() { - klog.Infof("Starting metrics server at address %q", bindAddress) - var listenAndServe func() error - if certFile != "" && keyFile != "" { - server = getTLSServer(bindAddress, certFile, keyFile, handler) - listenAndServe = func() error { return server.ListenAndServeTLS("", "") } - } else { - server = &http.Server{Addr: bindAddress, Handler: handler} - listenAndServe = func() error { return server.ListenAndServe() } - } - - errCh := make(chan error) - go func() { - errCh <- listenAndServe() - }() - var err error - select { - case err = <-errCh: - err = fmt.Errorf("failed while running metrics server at address %q: %w", bindAddress, err) - utilruntime.HandleError(err) - case <-stopChan: - klog.Infof("Stopping metrics server at address %q", bindAddress) - shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - if err := server.Shutdown(shutdownCtx); err != nil { - klog.Errorf("Error stopping metrics server at address %q: %v", bindAddress, err) - } - } - }, 5*time.Second, stopChan) - }() -} diff --git a/go-controller/pkg/metrics/ovn.go b/go-controller/pkg/metrics/ovn.go index a243aec0ee..cb266f2026 100644 --- a/go-controller/pkg/metrics/ovn.go +++ b/go-controller/pkg/metrics/ovn.go @@ -364,7 +364,7 @@ func updateSBDBConnectionMetric(ovsAppctl ovsClient) { } // RegisterOvnControllerMetrics registers the ovn-controller metrics -func RegisterOvnControllerMetrics(ovsDBClient libovsdbclient.Client, ovnRegistry *prometheus.Registry) { +func RegisterOvnControllerMetrics(ovsDBClient libovsdbclient.Client, ovnRegistry prometheus.Registerer) { getOvnControllerVersionInfo() ovnRegistry.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ diff --git a/go-controller/pkg/metrics/ovn_db.go b/go-controller/pkg/metrics/ovn_db.go index e42fa1be3f..7a9cfdd0f7 100644 --- a/go-controller/pkg/metrics/ovn_db.go +++ b/go-controller/pkg/metrics/ovn_db.go @@ -359,7 +359,7 @@ func getOvnDbVersionInfo() { } } -func RegisterOvnDBMetrics(ovnRegistry *prometheus.Registry) ([]*util.OvsDbProperties, bool, bool) { +func RegisterOvnDBMetrics(ovnRegistry prometheus.Registerer) ([]*util.OvsDbProperties, bool, bool) { // get the ovsdb server version info getOvnDbVersionInfo() // register metrics that will be served off of /metrics path diff --git a/go-controller/pkg/metrics/server.go b/go-controller/pkg/metrics/server.go index 88d04ce2a4..2641cb4ec7 100644 --- a/go-controller/pkg/metrics/server.go +++ b/go-controller/pkg/metrics/server.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "net/http" + "net/http/pprof" "time" "github.com/prometheus/client_golang/prometheus" @@ -35,11 +36,16 @@ type MetricServerOptions struct { EnableOVNDBMetrics bool EnableOVNControllerMetrics bool EnableOVNNorthdMetrics bool + EnablePprof bool // OnFatalError is called when an unrecoverable error occurs (e.g., failed to bind to address). // If set, it allows the caller to trigger a graceful shutdown. OnFatalError func() + // Prometheus plumbing + Registerer prometheus.Registerer + Gatherer prometheus.Gatherer + // Kubernetes integration K8sClient kubernetes.Interface K8sNodeName string @@ -63,35 +69,59 @@ type MetricServer struct { server *http.Server mux *http.ServeMux - // Prometheus registries - ovnRegistry *prometheus.Registry + // Prometheus registry / gatherer + registerer prometheus.Registerer + gatherer prometheus.Gatherer } // NewMetricServer creates a new MetricServer instance func NewMetricServer(opts MetricServerOptions, ovsDBClient libovsdbclient.Client, kubeClient kubernetes.Interface) *MetricServer { + registerer := opts.Registerer + if registerer == nil { + registerer = prometheus.NewRegistry() + } + gatherer := opts.Gatherer + if gatherer == nil { + if reg, ok := registerer.(prometheus.Gatherer); ok { + gatherer = reg + } else { + gatherer = prometheus.DefaultGatherer + } + } + // Create server instance server := &MetricServer{ opts: opts, ovsDBClient: ovsDBClient, - ovnRegistry: prometheus.NewRegistry(), + registerer: registerer, + gatherer: gatherer, kubeClient: kubeClient, } server.mux = http.NewServeMux() - metricsHandler := promhttp.HandlerForTransactional( - prometheus.ToTransactionalGatherer(server.ovnRegistry), - promhttp.HandlerOpts{}, - ) + tg := prometheus.ToTransactionalGatherer(server.gatherer) + metricsHandler := promhttp.HandlerForTransactional(tg, promhttp.HandlerOpts{}) + server.mux.Handle("/metrics", promhttp.InstrumentMetricHandler( - server.ovnRegistry, + server.registerer, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Update metrics in the registry before emitting them. server.handleMetrics(r) - // Emit the updated metrics using the transactional handler. metricsHandler.ServeHTTP(w, r) }), )) + if opts.EnablePprof { + server.mux.HandleFunc("/debug/pprof/", pprof.Index) + server.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + server.mux.HandleFunc("/debug/pprof/profile", pprof.Profile) + server.mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + server.mux.HandleFunc("/debug/pprof/trace", pprof.Trace) + + // Allow changes to log level at runtime + server.mux.HandleFunc("/debug/flags/v", stringFlagPutHandler(klogSetter)) + } + return server } @@ -99,32 +129,32 @@ func NewMetricServer(opts MetricServerOptions, ovsDBClient libovsdbclient.Client func (s *MetricServer) registerMetrics() { if s.opts.EnableOVSMetrics { klog.Infof("MetricServer registers OVS metrics") - registerOvsMetrics(s.ovsDBClient, s.ovnRegistry) + registerOvsMetrics(s.ovsDBClient, s.registerer) } if s.opts.EnableOVNDBMetrics { klog.Infof("MetricServer registers OVN DB metrics") - s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.ovnRegistry) + s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.registerer) } if s.opts.EnableOVNControllerMetrics { klog.Infof("MetricServer registers OVN Controller metrics") - RegisterOvnControllerMetrics(s.ovsDBClient, s.ovnRegistry) + RegisterOvnControllerMetrics(s.ovsDBClient, s.registerer) } if s.opts.EnableOVNNorthdMetrics { klog.Infof("MetricServer registers OVN Northd metrics") - RegisterOvnNorthdMetrics(s.ovnRegistry) + RegisterOvnNorthdMetrics(s.registerer) } } func (s *MetricServer) EnableOVNNorthdMetrics() { s.opts.EnableOVNNorthdMetrics = true klog.Infof("MetricServer registers OVN Northd metrics") - RegisterOvnNorthdMetrics(s.ovnRegistry) + RegisterOvnNorthdMetrics(s.registerer) } func (s *MetricServer) EnableOVNDBMetrics() { s.opts.EnableOVNDBMetrics = true klog.Infof("MetricServer registers OVN DB metrics") - s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.ovnRegistry) + s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.registerer) } // updateOvsMetrics updates the OVS metrics @@ -226,6 +256,7 @@ func (s *MetricServer) Run(stopChan <-chan struct{}) { errCh := make(chan error) go func() { + klog.Infof("Metric Server starts to listen on %s", s.opts.BindAddress) errCh <- listenAndServe() }() diff --git a/go-controller/pkg/metrics/server_test.go b/go-controller/pkg/metrics/server_test.go index 32d4144dd8..98e568b80f 100644 --- a/go-controller/pkg/metrics/server_test.go +++ b/go-controller/pkg/metrics/server_test.go @@ -13,6 +13,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/prometheus/client_golang/prometheus" "github.com/spf13/afero" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -48,7 +49,8 @@ func TestNewMetricServerRunAndShutdown(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) require.NotNil(t, server, "Server should not be nil") require.NotNil(t, server.mux, "Server mux should not be nil") - require.NotNil(t, server.ovnRegistry, "Server OVN registry should not be nil") + require.NotNil(t, server.registerer, "Server registerer should not be nil") + require.NotNil(t, server.gatherer, "Server gatherer should not be nil") // Start server in background serverDone := make(chan struct{}) @@ -109,7 +111,8 @@ func TestNewMetricServerRunAndFailOnFatalError(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) require.NotNil(t, server, "Server should not be nil") require.NotNil(t, server.mux, "Server mux should not be nil") - require.NotNil(t, server.ovnRegistry, "Server OVN registry should not be nil") + require.NotNil(t, server.registerer, "Server registerer should not be nil") + require.NotNil(t, server.gatherer, "Server gatherer should not be nil") // Start server in background serverDone := make(chan struct{}) @@ -316,6 +319,7 @@ type metricsTestCase struct { enableOVNDB bool enableOVNController bool enableOVNNorthd bool + registerer prometheus.Registerer mockRunCommands []ovntest.TestifyMockHelper expectedMetrics []string } @@ -379,6 +383,11 @@ func TestHandleMetrics(t *testing.T) { } defer libovsdbCleanup.Cleanup() + // Register OVN-Kube controller base metrics into the default registry, so the + // metrics in default registry can be tested. + RegisterOVNKubeControllerBase() + MetricOVNKubeControllerSyncDuration.WithLabelValues("pods").Set(0) + testCases := []metricsTestCase{ { name: "OVS metrics", @@ -778,6 +787,56 @@ func TestHandleMetrics(t *testing.T) { "promhttp_metric_handler_requests_total", }, }, + { + name: "default registry metrics", + registerer: prometheus.DefaultRegisterer, + expectedMetrics: []string{ + "ovnkube_controller_leader", + "ovnkube_controller_ready_duration_seconds", + "ovnkube_controller_sync_duration_seconds", + "ovnkube_controller_build_info", + "go_gc_duration_seconds", + "go_gc_gogc_percent", + "go_gc_gomemlimit_bytes", + "go_goroutines", + "go_info", + "go_memstats_alloc_bytes", + "go_memstats_alloc_bytes_total", + "go_memstats_buck_hash_sys_bytes", + "go_memstats_frees_total", + "go_memstats_gc_sys_bytes", + "go_memstats_heap_alloc_bytes", + "go_memstats_heap_idle_bytes", + "go_memstats_heap_inuse_bytes", + "go_memstats_heap_objects", + "go_memstats_heap_released_bytes", + "go_memstats_heap_sys_bytes", + "go_memstats_last_gc_time_seconds", + "go_memstats_mallocs_total", + "go_memstats_mcache_inuse_bytes", + "go_memstats_mcache_sys_bytes", + "go_memstats_mspan_inuse_bytes", + "go_memstats_mspan_sys_bytes", + "go_memstats_next_gc_bytes", + "go_memstats_other_sys_bytes", + "go_memstats_stack_inuse_bytes", + "go_memstats_stack_sys_bytes", + "go_memstats_sys_bytes", + "go_sched_gomaxprocs_threads", + "go_threads", + "process_cpu_seconds_total", + "process_max_fds", + "process_network_receive_bytes_total", + "process_network_transmit_bytes_total", + "process_open_fds", + "process_resident_memory_bytes", + "process_start_time_seconds", + "process_virtual_memory_bytes", + "process_virtual_memory_max_bytes", + "promhttp_metric_handler_requests_in_flight", + "promhttp_metric_handler_requests_total", + }, + }, } for _, tc := range testCases { @@ -789,6 +848,7 @@ func TestHandleMetrics(t *testing.T) { EnableOVNDBMetrics: tc.enableOVNDB, EnableOVNControllerMetrics: tc.enableOVNController, EnableOVNNorthdMetrics: tc.enableOVNNorthd, + Registerer: tc.registerer, } // Mock the exec runner for RunOvsVswitchdAppCtl calls mockCmd := new(mock_k8s_io_utils_exec.Cmd) @@ -814,7 +874,7 @@ func TestHandleMetrics(t *testing.T) { server.registerMetrics() // iterate s.ovnRegistry to list all registered metrics' names - regMetrics, err := server.ovnRegistry.Gather() + regMetrics, err := server.gatherer.Gather() if err != nil { t.Fatalf("Failed to gather metrics: %v", err) } From 6ddb8b763303f86123353cd7d8d24b4655ed5361 Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Tue, 10 Feb 2026 12:27:31 +0000 Subject: [PATCH 04/59] Handle stale PCI address in mgmt port Init for DPU Host case A DPU firmware settings change can cause the same physical port to be re-enumerated under a different PCI address after a host reboot. Previously, Init() only handled missing device IDs (legacy annotations). Now it also detects when the annotated device ID is no longer present in the allocator and falls back to matching by PfId and FuncId. Signed-off-by: Yury Kulazhenkov --- .../node/managementport/portDeviceManager.go | 19 +- .../managementport/portDeviceManager_test.go | 201 ++++++++++++++++++ 2 files changed, 217 insertions(+), 3 deletions(-) create mode 100644 go-controller/pkg/node/managementport/portDeviceManager_test.go diff --git a/go-controller/pkg/node/managementport/portDeviceManager.go b/go-controller/pkg/node/managementport/portDeviceManager.go index 5447836298..b4e7d2f593 100644 --- a/go-controller/pkg/node/managementport/portDeviceManager.go +++ b/go-controller/pkg/node/managementport/portDeviceManager.go @@ -2,6 +2,7 @@ package managementport import ( "fmt" + "slices" "sync" "k8s.io/klog/v2" @@ -60,10 +61,22 @@ func (mpdm *MgmtPortDeviceManager) Init() error { // validate the existing management port reservations: for network, annotatedMgmtPortDetails := range annotatedMgmtPortDetailsMap { deviceId := annotatedMgmtPortDetails.DeviceId + allDeviceIDs := mpdm.deviceAllocator.DeviceIDs() + if deviceId != "" && !slices.Contains(allDeviceIDs, deviceId) { + // The device ID from the annotation is no longer available in the + // resource pool. This can happen if the management port is + // re-enumerated at a different PCI address after a host reboot (for + // example, due to changes in DPU firmware settings). + klog.V(5).Infof("Manage port device %s of resource %s for network %s is no longer available, "+ + "ignore DeviceID value from the annotation", deviceId, mpdm.deviceAllocator.ResourceName(), network) + deviceId = "" + } if deviceId == "" { - // this must be legacyManagementPortDetails annotation for default network, try to find its deviceId. - // luckily this is one time thing - allDeviceIDs := mpdm.deviceAllocator.DeviceIDs() + // The device ID may be missing from the annotation (legacy default + // network annotation) or stale (not present in the resource pool). + // In either case, look up the device by PfId and FuncId, assuming + // the device plugin still exposes the same port and that we should + // consume the same VF index. for _, d := range allDeviceIDs { mgmtDetails, err := util.GetNetworkDeviceDetails(d) if err == nil && mgmtDetails.PfId == annotatedMgmtPortDetails.PfId && mgmtDetails.FuncId == annotatedMgmtPortDetails.FuncId { diff --git a/go-controller/pkg/node/managementport/portDeviceManager_test.go b/go-controller/pkg/node/managementport/portDeviceManager_test.go new file mode 100644 index 0000000000..79428672da --- /dev/null +++ b/go-controller/pkg/node/managementport/portDeviceManager_test.go @@ -0,0 +1,201 @@ +package managementport + +import ( + "encoding/json" + "os" + "strings" + + "github.com/stretchr/testify/mock" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/deviceresource" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + kubeMocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube/mocks" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + utilMocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/mocks" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const testNodeName = "test-node" + +// envVarForResource returns the device-plugin environment variable +// name that corresponds to the given resource name. +func envVarForResource(resourceName string) string { + s := strings.ReplaceAll(resourceName, ".", "_") + s = strings.ReplaceAll(s, "/", "_") + return "PCIDEVICE_" + strings.ToUpper(s) +} + +// setupInitTestEnv creates a DeviceResourceAllocator backed by the +// given PCI IDs list and a NodeWatchFactory whose fake client +// contains a single node with the provided management-port annotation. +// If annotation is nil the node is created without the annotation. +func setupInitTestEnv( + resourceName string, availableDevices []string, + annotation util.NetworkDeviceDetailsMap, +) (*deviceresource.DeviceResourceAllocator, factory.NodeWatchFactory) { + envVarName := envVarForResource(resourceName) + os.Setenv(envVarName, strings.Join(availableDevices, ",")) + DeferCleanup(os.Unsetenv, envVarName) + + allocator, err := deviceresource.DeviceResourceManager().GetDeviceResourceAllocator(resourceName) + Expect(err).NotTo(HaveOccurred()) + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeName, + Annotations: map[string]string{}, + }, + } + if annotation != nil { + annotationBytes, err := json.Marshal(annotation) + Expect(err).NotTo(HaveOccurred()) + node.Annotations[util.OvnNodeManagementPort] = string(annotationBytes) + } + + fakeClient := fake.NewSimpleClientset( + &corev1.NodeList{Items: []corev1.Node{*node}}) + fakeNodeClient := &util.OVNNodeClientset{KubeClient: fakeClient} + wf, err := factory.NewNodeWatchFactory(fakeNodeClient, testNodeName) + Expect(err).NotTo(HaveOccurred()) + Expect(wf.Start()).To(Succeed()) + + return allocator, wf +} + +// mockDeviceDetails sets up SriovnetOps mock expectations so that +// util.GetNetworkDeviceDetails(deviceId) returns the given pfId and +// funcId. +func mockDeviceDetails( + sriovMock *utilMocks.SriovnetOps, + deviceId string, pfId, funcId int, +) { + sriovMock.On("GetVfIndexByPciAddress", deviceId).Return(funcId, nil) + sriovMock.On("GetPfIndexByVfPciAddress", deviceId).Return(pfId, nil) +} + +var _ = Describe("MgmtPortDeviceManager tests", func() { + var ( + sriovnetOpsMock *utilMocks.SriovnetOps + origSriovnetOps util.SriovnetOps + kubeMock *kubeMocks.Interface + ) + BeforeEach(func() { + origSriovnetOps = util.GetSriovnetOps() + Expect(config.PrepareTestConfig()).To(Succeed()) + sriovnetOpsMock = &utilMocks.SriovnetOps{} + util.SetSriovnetOpsInst(sriovnetOpsMock) + kubeMock = &kubeMocks.Interface{} + }) + + AfterEach(func() { + util.SetSriovnetOpsInst(origSriovnetOps) + sriovnetOpsMock.AssertExpectations(GinkgoT()) + kubeMock.AssertExpectations(GinkgoT()) + }) + + Context("Init", func() { + It("Succeeds with no management port annotation", func() { + allocator, wf := setupInitTestEnv( + "example.com/pool_no_annotation", + []string{"0000:05:00.0"}, + nil, + ) + DeferCleanup(wf.Shutdown) + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + Expect(mpdm.Init()).NotTo(HaveOccurred()) + Expect(mpdm.mgmtPortDetails).To(BeEmpty()) + }) + It("Restores valid DeviceId with matching PfId/FuncId", func() { + const ( + device0 = "0000:03:00.0" + device1 = "0000:03:00.1" + ) + allocator, wf := setupInitTestEnv( + "example.com/pool_valid_restore", + []string{device0, device1}, + util.NetworkDeviceDetailsMap{ + "default": {DeviceId: device0, PfId: 0, FuncId: 4}, + }, + ) + DeferCleanup(wf.Shutdown) + mockDeviceDetails(sriovnetOpsMock, device0, 0, 4) + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + Expect(mpdm.Init()).NotTo(HaveOccurred()) + Expect(mpdm.mgmtPortDetails["default"].DeviceId).To(Equal(device0)) + }) + It("Restores legacy annotation without DeviceId by PfId/FuncId match", func() { + const ( + device0 = "0000:04:00.0" + device1 = "0000:04:00.1" + ) + allocator, wf := setupInitTestEnv( + "example.com/pool_legacy_restore", + []string{device0, device1}, + util.NetworkDeviceDetailsMap{"default": {PfId: 0, FuncId: 4}}, + ) + DeferCleanup(wf.Shutdown) + mockDeviceDetails(sriovnetOpsMock, device0, 0, 4) + + kubeMock.On("SetAnnotationsOnNode", testNodeName, + mock.Anything).Return(nil).Once() + + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + Expect(mpdm.Init()).NotTo(HaveOccurred()) + Expect(mpdm.mgmtPortDetails["default"].DeviceId). + To(Equal(device0)) + }) + It("Recovers by PfId/FuncId when annotated DeviceId is stale", func() { + const ( + staleDevice = "0000:01:01.0" + matchDevice = "0000:01:00.0" + otherDevice = "0000:01:00.1" + ) + allocator, wf := setupInitTestEnv( + "example.com/pool_stale_recovery", + []string{matchDevice, otherDevice}, + util.NetworkDeviceDetailsMap{ + "default": {DeviceId: staleDevice, PfId: 3, FuncId: 5}, + }, + ) + DeferCleanup(wf.Shutdown) + mockDeviceDetails(sriovnetOpsMock, matchDevice, 3, 5) + kubeMock.On("SetAnnotationsOnNode", testNodeName, + mock.Anything).Return(nil).Once() + + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + Expect(mpdm.Init()).NotTo(HaveOccurred()) + Expect(mpdm.mgmtPortDetails["default"].DeviceId). + To(Equal(matchDevice)) + }) + It("Fails when no PfId/FuncId match after ignoring stale DeviceId", func() { + const ( + staleDevice = "0000:02:01.0" + device1 = "0000:02:00.0" + device2 = "0000:02:00.1" + ) + allocator, wf := setupInitTestEnv( + "example.com/pool_stale_no_match", + []string{device1, device2}, + util.NetworkDeviceDetailsMap{ + "default": {DeviceId: staleDevice, PfId: 3, FuncId: 5}, + }, + ) + DeferCleanup(wf.Shutdown) + // Neither device matches PfId=3, FuncId=5 + mockDeviceDetails(sriovnetOpsMock, device1, 1, 1) + mockDeviceDetails(sriovnetOpsMock, device2, 2, 2) + + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + err := mpdm.Init() + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to find match manage port device")) + }) + }) +}) From e527aa8033a8ddfed8e1836fec7bc7c6e47b3997 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Tue, 10 Feb 2026 19:20:50 -0500 Subject: [PATCH 05/59] Adds DPU blog Signed-off-by: Tim Rozet --- docs/blog/dpu-acceleration.md | 396 ++++ docs/blog/index.md | 3 +- docs/images/ovnk-accelerated.excalidraw | 1149 +++++++++++ docs/images/ovnk-accelerated.svg | 4 + docs/images/ovnk-unaccelerated.excalidraw | 2131 +++++++++++++++++++++ docs/images/ovnk-unaccelerated.svg | 4 + mkdocs.yml | 1 + 7 files changed, 3687 insertions(+), 1 deletion(-) create mode 100644 docs/blog/dpu-acceleration.md create mode 100644 docs/images/ovnk-accelerated.excalidraw create mode 100644 docs/images/ovnk-accelerated.svg create mode 100644 docs/images/ovnk-unaccelerated.excalidraw create mode 100644 docs/images/ovnk-unaccelerated.svg diff --git a/docs/blog/dpu-acceleration.md b/docs/blog/dpu-acceleration.md new file mode 100644 index 0000000000..9de7f0daac --- /dev/null +++ b/docs/blog/dpu-acceleration.md @@ -0,0 +1,396 @@ +# Accelerating and Offloading Kubernetes Networking: Leveraging DPUs with OVN-Kubernetes + +## Introduction + +This blog post provides a comprehensive guide on deploying OVN-Kubernetes in an environment leveraging Data Processing Units (DPUs). +This setup is crucial for high-performance networking in cloud-native applications, offloading network processing from the host CPU to the +DPU, and providing better security for the networking control-plane. Additionally, OVN-Kubernetes brings in robust features like User Defined +Networks (UDNs) that enable per tenant network isolation into the Kubernetes environment and integrates with the DPU solution. + +In this guide, **offloading** means moving OVN-Kubernetes SDN control and data plane work from the host into the DPU to free host CPU and memory +resources. Note, this is different from **OVS offloading**, where datapath processing is offloaded from kernel to hardware +ASICs inside a DPU or SmartNIC. +**Acceleration** means using hardware acceleration capabilities inside the DPU (SmartNIC/OVS offload paths) to deliver higher network +throughput and lower latency. + +## Architecture Overview + +Within a typical Kubernetes worker node, the CNI typically runs as a host networked pod alongside other processes in the host. +In an unaccelerated+non-offloaded environment, OVN-Kubernetes behaves the same way, with its stack composed of OVN-Kubernetes, +Open Virtual Network (OVN), and Open vSwitch (OVS). OVN-Kubernetes listens for KAPI events, configures a logical topology in OVN, +and then OVN translates that into OpenFlow which is programmed into the OVS datapath. Here is an overview of a typical setup: + +![Regular OVN-Kubernetes Worker Node](../images/ovnk-unaccelerated.svg) + +--- + +In this environment there are several potential issues to highlight: + +* The SDN control and data planes are consuming CPU/memory resources on the host itself. +* The SDN control and data planes are able to be compromised if an attacker breaks out of a pod into the host namespace. +* Pods are attached with veth interfaces. Although fast path traffic is handled within the kernel using the OVS kernel module, + there are realistic throughput and latency limitations. + +--- + +A DPU is a system on a chip that is pluggable into the server via a PCIe slot. It provides a specialized environment with +its own CPU, memory, storage and OS. It also includes a SmartNIC architecture that provides packet processing acceleration and offload capabilities. +Integrating the DPU into the OVN-Kubernetes CNI provides the ability to move the SDN control and data plane down into the DPU. +This solves the aforementioned issues by: + +* Relieving the Host of resource consumption. +* Adding security so that if a pod breaks out on the host, it cannot access/compromise the SDN control and data plane. +* Pods are accelerated with Virtual Function (VF) interfaces which combined with specialized smart NIC hardware to offload + the OVS datapath results in best in class throughput and latency. + +--- + +Here is a diagram of a DPU accelerated worker node with OVN-Kubernetes: + +![Accelerated OVN-Kubernetes Worker Node](../images/ovnk-accelerated.svg) + +--- + +In this kind of deployment the DPU is not part of the *Host Kubernetes Cluster*. This is typically referred to as an “*off-host-cluster*” +type of deployment. OVN-Kubernetes running in the DPU is typically managed as a secondary Kubernetes cluster, but it does not have to be. +The kubeconfig of the host cluster is provided to OVN-Kube running in the DPU in order to configure the SDN. For the purpose of this guide, +we will treat the DPU as if it is in its own secondary Kubernetes cluster, referred to as the *DPU Kubernetes Cluster*. +OVN-Kube on the DPU is responsible for configuring OVN and wiring up networking within the DPU, while OVN-Kube on the Host is a lightweight +container used to plug in the network interfaces for pods in the Host. + +## Getting Started + +Before starting the deployment, ensure the following prerequisites are met: + +* A Host Kubernetes cluster with nodes equipped with compatible DPUs. +* A secondary DPU Kubernetes cluster that contains the DPUs. +* Kube-proxy should be disabled in both clusters. +* Familiarity with OVN-Kubernetes and general Kubernetes networking concepts. +* Ensure the versions of Kubernetes are compatible with the version of OVN-Kubernetes to be used in the cluster. An OVN-Kubernetes version of at least 1.3 Alpha (latest master as of Feb 2026) is required, which maps to Kubernetes 1.34 as of this writing. + +Note, for setting up Kubernetes clusters quickly for a test environment, see [kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/). + +For this guide, we will focus on the setup using an NVIDIA BlueField-3 DPU, which has been tested to be +compatible with OVN-Kubernetes. +The OVN-Kubernetes DPU architecture is not specific to NVIDIA DPUs, and should be compatible with any DPU vendor. +Currently only NVIDIA BlueField DPUs have been tested and considered supported by upstream. +For the BlueField-3 Linux OS, ensure that it matches the version supported by the DOCA drivers. At the time of this writing, +the current DOCA drivers for BlueField-3 are version 3.2.1, which are compatible with Ubuntu 24.04 on the DPU. +Note, the Host Linux OS also needs DOCA software installed, but there are many different flavors and versions of Linux +supported for 3.2.1. +For the full list check out the [NVIDIA DOCA downloads page](https://developer.nvidia.com/doca-downloads?deployment_platform=Host-Server&deployment_package=DOCA-Host&target_os=Linux&Architecture=x86_64&Profile=doca-all). + +## Limitations + +A pod who's network interface is provided by the OVN-Kubernetes on the DPU is considered to be "DPU accelerated". +When using a DPU with OVN-Kubernetes, all OVN-Kubernetes networked pod interfaces on the Host must be DPU accelerated. +There is no support for a mix of unaccelerated and accelerated workloads within the same node. However, it is possible to +have a node with accelerated workloads with a DPU, and then another node with unaccelerated pods. +It is also possible to have a mix of some nodes with DPU and other nodes with other types of SmartNICs. + +Furthermore, it is also not currently possible in OVN-Kubernetes to use more than one SmartNIC or DPU within a node. +There are plans in future to lift these limitations. + +## OVS Offload Considerations + +OVS provides two methods of datapath hardware offload: + +* **Kernel Hardware Offload** - Using Linux Traffic Control to configure the DPU. +* **OVS DOCA** - A userspace OVS control plane that programs a fully hardware-resident datapath on the DPU using DOCA, with no kernel OVS or TC involvement. + +Either option is supported by OVN-Kubernetes, and either option may be used in this guide. + +## Setting up the DPU and Host + +The DPU must be configured to handle networking functions for the host. The following configuration for the DPU will be done on the Host itself. + +1. Install DOCA-Host drivers on Host if not already present. Note, these are required even if not using OVS DOCA in order to access the rshim interface to manage the DPU. Be sure to use at least version 3.2.1. + * Go to [developer.nvidia.com/doca-downloads](https://developer.nvidia.com/doca-downloads), in the “Select” form, choose **Host-Server** -> **DOCA-Host** -> **Linux** -> **x86_64** -> **doca-ofed** -> {*Your-OS-Distribution*} -> {*Your-OS-Version*} -> {*Preferred installation type*}, then follow the instructions displayed below the form to install the package. + + Note: Some extra packages may be required depending on your distribution. + +2. Make sure that **bfb-install** exists after the above step. +3. Start **rshim** by running either “systemctl enable -–now rshim” or simply “rshim”. +4. Make sure that `/dev/rshim*` device file shows up after the above step. + +5. Update BFB and firmware for Mellanox Bluefield DPUs + * Go to [developer.nvidia.com/doca-downloads](https://developer.nvidia.com/doca-downloads), in the “Select” form, choose **BlueField** -> **BF-Bundle** -> **Ubuntu** -> {*Version*} -> **BFB**, then click “Download” to start downloading. + * Upload the BFB package to the Host. + * On the Host, follow the instructions to install the BFB package. You may provide a config file to set up a password for the **ubuntu** account, in the following format: “**ubuntu_PASSWORD='$1……'**” + * An encrypted password can be generated by command “**openssl passwd -1**”. + * Please power cycle (off and then on) the Host to reboot and run the newly installed software and firmware. + +6. The DPU has two modes, DPU and NIC, modes. DPU mode is required for this solution. + * Run following commands on the Host to identify the current mode or update it to enable DPU mode: + + ```bash + user@fedora: mlxconfig -d /dev/mst/ q INTERNAL_CPU_OFFLOAD_ENGINE + ENABLED(0) + + # to configure BlueField 3: + user@fedora: mlxconfig -d /dev/mst/ s INTERNAL_CPU_OFFLOAD_ENGINE=0 + + # to configure BlueField 2: + user@fedora: mlxconfig -d /dev/mst/ s INTERNAL_CPU_PAGE_SUPPLIER=0 INTERNAL_CPU_ESWITCH_MANAGER=0 INTERNAL_CPU_IB_VPORT0=0 INTERNAL_CPU_OFFLOAD_ENGINE=0 + ``` + +7. Reboot. + +8. Optional: At this point you may follow this guide in order to enable OVS DOCA offload support: [docs.nvidia.com/doca/sdk/ovs-doca-hardware-acceleration/index.html](https://docs.nvidia.com/doca/sdk/ovs-doca-hardware-acceleration/index.html) + +9. On the Host, configure the desired number of VFs, then rename the first VF device so that it can be dedicated to and referenced by OVN-Kubernetes as the OVN-Kubernetes management port: + + ```bash + user@fedora: echo ${num_of_desired_vfs} > /sys/class/net/${interface}/devices/sriov_numvfs + user@fedora: ip link set ens1f0v0 down + user@fedora: ip link set ens1f0v0 name forOVN0 + user@fedora: ip link set forOVN0 up + ``` + +10. The BFB package installed earlier includes Open vSwitch (OVS). OVS will be installed as a systemd service, and the service is enabled by default. By default DPU will come up with 2 bridges, ovsbr1 and ovsbr2, regardless if the port is cabled or not. You may delete them and create your own bridges, just remember to add uplink **p0/p1** and x86 representor **pf0hpf/pf1hpf** to the new bridge. We will use a tool called minicom to get into the DPU from the host and configure OVS. + + ```bash + #### run minicom on x86 host to login to the DPU via rshim interface + # minicom -D /dev/rshim0/console + #### login to DPU + user@ubuntu: ovs-vsctl show + c41c98ac-0159-4874-97d5-17a4d2647d70 + Bridge ovsbr2 + Port en3f1pf1sf0 + Interface en3f1pf1sf0 + Port p1 + Interface p1 + Port pf1hpf + Interface pf1hpf + Port ovsbr2 + Interface ovsbr2 + type: internal + Bridge ovsbr1 + Port p0 + Interface p0 + Port ovsbr1 + Interface ovsbr1 + type: internal + Port pf0hpf + Interface pf0hpf + Port en3f0pf0sf0 + Interface en3f0pf0sf0 + ovs_version: "3.2.1005" + user@ubuntu: ovs-vsctl del-br ovsbr1 + user@ubuntu: ovs-vsctl del-br ovsbr2 + user@ubuntu: ovs-vsctl add-br brp0 + user@ubuntu: ovs-vsctl add-port brp0 p0 + user@ubuntu: ovs-vsctl add-port brp0 pf0hpf + ``` + +11. Now that the OVS bridge is created with the proper port configuration, we need to configure the IP address of the bridge. Typically this involves moving the IP address that was already configured on the **en3f0pf0sf0** interface to the **brp0** bridge. This IP address will be used for Geneve encapsulation (ovn-encap-ip), and therefore we must configure the OVS bridge so that OVN is aware of it. Additionally, take note of the default gateway route on the Host (10.1.65.1 in this example). We will need to configure this as well in the OVS bridge so OVN will use it as its default gateway. + + ```bash + #### run minicom on x86 host to login to the DPU via rshim interface + # minicom -D /dev/rshim0/console + #### login to DPU + user@ubuntu: ip addr del 10.1.65.155/24 dev en3f0pf0sf0 + user@ubuntu: ip addr add 10.1.65.155/24 dev brp0 + #### make brp0 as the default route interface + user@ubuntu: ip r add default via 10.1.65.1 dev brp0 + #### configure OVS + user@ubuntu: ovs-vsctl set Open_vSwitch . other_config:hw-offload=true + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-encap-ip="10.1.65.155" + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-gw-interface="brp0" + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-gw-nexthop="10.1.65.1" + #### configure the hostname of the Host as it will appear in the Host Kubernetes Cluster + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:host-k8s-nodename="host-worker-1" + ``` + +## Deploying OVN-Kubernetes + +A version of OVN-Kubernetes at least with 1.3 is required for DPUs. At the time of this writing, 1.3 is in Alpha state. The following steps should be done from a jumphost that has Kubeconfig access to both the Host and DPU cluster. + +1. Build or download the OVN-Kubernetes container images. Refer to this [image build guide](../developer-guide/image-build.md) on how to build/obtain the artifacts. +2. Upload the images to a container registry that is reachable by both clusters. +3. Label all Host nodes with DPU with **k8s.ovn.org/dpu-host=""** +4. Label all DPU nodes with **k8s.ovn.org/dpu=""** +5. `git clone https://github.com/ovn-kubernetes/ovn-kubernetes` to obtain the helm charts. +6. Follow the [upstream installation guide](../installation/launching-ovn-kubernetes-with-dpu.md) to configure the helm charts correctly and install OVN-Kubernetes to the Host and DPU. + +## Install SR-IOV Device Plugin + +OVN-Kubernetes relies on SR-IOV Plugin to provision VFs for the pods. Once allocated, OVN-Kubernetes will plug the VF for the pod on the Host into the pod network namespace. Then, on the DPU side, it will plug in the VF representor into OVS. From the jumphost follow these steps and use the kubeconfig of the Host Kubernetes cluster. + +1. `git clone https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin`. Use at least tag v3.11.0. +2. Configure the SR-IOV resource that OVN-Kubernetes will use. Replace the content of `deployments/configMap.yaml` with: + + ```yaml + apiVersion: v1 + kind: ConfigMap + metadata: + name: sriovdp-config + namespace: kube-system + data: + config.json: | + { + "resourceList": [ + { + "resourceName": "asap2_vf", + "resourcePrefix": "nvidia.com", + "excludeTopology": true, + "selectors": { + "vendors": [ "15b3" ], + "devices": [ "101e" ], + "drivers": [ "mlx5_core" ], + "pfNames": [ "ens1f0np0#1-7" ] + } + } + ] + } + ``` + +3. `kubectl create -f deployments/configMap.yaml` +4. `kubectl create -f deployments/sriovdp-daemonset.yaml` + +## Install Multus + +Multus is needed in order to pass the VF allocated by SR-IOV Plugin to OVN-Kubernetes as the DeviceID. Furthermore, in addition to the primary network, OVN-Kubernetes supports Secondary Networks using Secondary Network Attachment Definitions (NADs) or Secondary User Defined Networks (UDNs). In simpler terms, a pod can have a VF for its default gateway interface, as well as one or more VFs for secondary networks. To leverage this capability, Multus needs to be installed. Follow these steps on the jumphost while using the Host kubeconfig. + +1. Download the deployment spec for Multus. Use at least tag v4.2.3: + + ```bash + user@jumphost: curl -LO https://raw.githubusercontent.com/k8snetworkplumbingwg/multus-cni/master/deployments/multus-daemonset.yml + ``` + +2. Create the Multus cni-conf file that will be used with OVN-Kubernetes: + + ```bash + user@jumphost: cat > cni-conf.json << 'EOF' + { + "name": "multus-cni-network", + "type": "multus", + "logLevel": "verbose", + "logFile": "/var/log/multus.log", + "namespaceIsolation": false, + "multusNamespace": "default", + "clusterNetwork": "ovn-primary", + "confDir": "/etc/cni/net.d", + "readinessindicatorfile": "/etc/cni/net.d/10-ovn-kubernetes.conf", + "kubeconfig": "/etc/cni/net.d/multus.d/multus.kubeconfig" + } + EOF + ``` + +3. Create the configMap using the cni-conf file: + + ```bash + user@jumphost: kubectl -n kube-system delete configmap multus-cni-config --ignore-not-found=true + user@jumphost: kubectl -n kube-system create configmap multus-cni-config --from-file=cni-conf.json + ``` + +4. Edit the `multus-daemonset.yml` previously downloaded. + + ```yaml + ... + spec: + ... + template: + ... + spec: + ... + containers: + - name: kube-multus + image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot + command: ["/thin_entrypoint"] + args: + - "/tmp/multus-conf/00-multus.conf" # Modify multus-conf-file + ... + volumes: + ... + - name: multus-cfg + configMap: + name: multus-cni-config + items: + - key: cni-conf.json + path: 00-multus.conf # Modify to 00-multus.conf + ``` + +5. Create the Multus Daemonset. + + ```bash + kubectl apply -f multus-daemonset.yml + ``` + +## Validating the Setup + +Now that we have configured everything it is time to create a pod and verify that it is properly offloaded. The last step to do before we can start a pod is to create the Network Attachment Definition (NAD) so that OVN-Kubernetes will be invoked as the CNI and use VFs for the default network. Additionally, Primary or Secondary User Defined Networks (UDNs) could also be created, but for the purpose of this guide we will focus on the Cluster Default Network (CDN). Follow these steps from the jumphost with the Host kubeconfig to create the NAD and a pod to test with. + +1. Configure the primary default NAD. Notice the `resourceName` annotation is set to the SR-IOV device plugin resource we previously configured: + + ```bash + cat < + +Kubernetes Worker NodeOVN-Kube DPUKubeletOVNOVSDPUeth0OVN-Kube DPU-HostPod BPod AHost \ No newline at end of file diff --git a/docs/images/ovnk-unaccelerated.excalidraw b/docs/images/ovnk-unaccelerated.excalidraw new file mode 100644 index 0000000000..d373705c25 --- /dev/null +++ b/docs/images/ovnk-unaccelerated.excalidraw @@ -0,0 +1,2131 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "3PSV2IpmimdHV3TGHwCnL", + "type": "rectangle", + "x": 467.4296875, + "y": 221.05078125, + "width": 1031.4375, + "height": 575.5703125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aD", + "roundness": { + "type": 3 + }, + "seed": 1323383732, + "version": 437, + "versionNonce": 1307439322, + "isDeleted": false, + "boundElements": [], + "updated": 1769211652514, + "link": null, + "locked": false + }, + { + "id": "Hhj9Yei2KIDBGtqDrnLuE", + "type": "text", + "x": 494.56640624999994, + "y": 233.42578125, + "width": 371.5190825257982, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aE", + "roundness": null, + "seed": 234782220, + "version": 215, + "versionNonce": 956850501, + "isDeleted": false, + "boundElements": [], + "updated": 1769807070243, + "link": null, + "locked": false, + "text": "Kubernetes Worker Node", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubernetes Worker Node", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "AFBhMx_FMLkpvpoVM55si", + "type": "diamond", + "x": 743.5546875, + "y": 445.03125, + "width": 191.01171874999997, + "height": 175.77734374999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aF", + "roundness": { + "type": 2 + }, + "seed": 1109773108, + "version": 903, + "versionNonce": 1838605157, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "d21SDy11vytpGym8ZRv9Z" + } + ], + "updated": 1769807111952, + "link": null, + "locked": false + }, + { + "id": "d21SDy11vytpGym8ZRv9Z", + "type": "text", + "x": 798.0214462280273, + "y": 515.4755859375, + "width": 82.57234191894531, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aG", + "roundness": null, + "seed": 265828788, + "version": 853, + "versionNonce": 1242508997, + "isDeleted": false, + "boundElements": [], + "updated": 1769807111952, + "link": null, + "locked": false, + "text": "Pod A", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "AFBhMx_FMLkpvpoVM55si", + "originalText": "Pod A", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "MMRCgxZ2SkPQRTbazh5Mt", + "type": "diamond", + "x": 775.771484375, + "y": 637.09375, + "width": 167.17578125000003, + "height": 123.34765625000003, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aH", + "roundness": { + "type": 2 + }, + "seed": 1941094156, + "version": 1015, + "versionNonce": 1658634021, + "isDeleted": true, + "boundElements": [ + { + "type": "text", + "id": "fBzHO81d67o5rHJ7nhi37" + } + ], + "updated": 1769807106331, + "link": null, + "locked": false + }, + { + "id": "fBzHO81d67o5rHJ7nhi37", + "type": "text", + "x": 829.225456237793, + "y": 686.4306640625, + "width": 60.67994689941406, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aI", + "roundness": null, + "seed": 262077836, + "version": 972, + "versionNonce": 1437462955, + "isDeleted": true, + "boundElements": [], + "updated": 1769807106331, + "link": null, + "locked": false, + "text": "Pod B", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "MMRCgxZ2SkPQRTbazh5Mt", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "o-BS3wjgvpBdXfEGxS4zJ", + "type": "rectangle", + "x": 653.71484375, + "y": 517.03515625, + "width": 135.08984375, + "height": 63.8671875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aJ", + "roundness": { + "type": 3 + }, + "seed": 1828499764, + "version": 508, + "versionNonce": 1836541978, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "CI86pCLaDcJplMW-JEioJ", + "type": "text", + "x": 675.0098114013672, + "y": 523.96875, + "width": 92.49990844726562, + "height": 50, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aJV", + "roundness": null, + "seed": 583086900, + "version": 473, + "versionNonce": 1445346950, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVN-Kube\nK2", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "o-BS3wjgvpBdXfEGxS4zJ", + "originalText": "OVN-Kube\nK2", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "uXVXhzqnTRn3VJnZ7vo3K", + "type": "rectangle", + "x": 837.640625, + "y": 549.546875, + "width": 342.1953125, + "height": 172.0546875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aO", + "roundness": { + "type": 3 + }, + "seed": 1521260428, + "version": 984, + "versionNonce": 893867226, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "EMcQCTn12gbouIGsMbCWR", + "type": "text", + "x": 855, + "y": 565.53515625, + "width": 39.619964599609375, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aQ", + "roundness": null, + "seed": 322143028, + "version": 709, + "versionNonce": 623410630, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "FGWbnjuJMPPm2--6Clo68", + "type": "rectangle", + "x": 1209.94921875, + "y": 505.71875, + "width": 104.7421875, + "height": 80.87109375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aS", + "roundness": { + "type": 3 + }, + "seed": 581768628, + "version": 738, + "versionNonce": 2113644954, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "MHVGjmp21WFrjCeVuw6_M", + "type": "text", + "x": 1232.8403396606445, + "y": 533.654296875, + "width": 58.95994567871094, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aT", + "roundness": null, + "seed": 962236812, + "version": 713, + "versionNonce": 807458054, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "L7 FW", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "FGWbnjuJMPPm2--6Clo68", + "originalText": "L7 FW", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "par5rlqYTD0bht0WGS4oz", + "type": "rectangle", + "x": 1218.1171875, + "y": 636.783203125, + "width": 104.7421875, + "height": 80.87109375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aU", + "roundness": { + "type": 3 + }, + "seed": 246895884, + "version": 723, + "versionNonce": 2126997082, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "VJCFsIBfciMT2Hg4opZrX", + "type": "text", + "x": 1242.4383087158203, + "y": 664.71875, + "width": 56.099945068359375, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aV", + "roundness": null, + "seed": 1235276684, + "version": 724, + "versionNonce": 1082934342, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "L7 LB", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "par5rlqYTD0bht0WGS4oz", + "originalText": "L7 LB", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ZHSHwJ-WMxVntYGdHzqAS", + "type": "rectangle", + "x": 875.52734375, + "y": 612.0859375, + "width": 131.03125, + "height": 42.83984375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aW", + "roundness": { + "type": 3 + }, + "seed": 436416012, + "version": 786, + "versionNonce": 276663066, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "yhhtdkwmarFaKUI6H9eVm", + "type": "text", + "x": 893.5230178833008, + "y": 621.005859375, + "width": 95.03990173339844, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aWV", + "roundness": null, + "seed": 1538533684, + "version": 713, + "versionNonce": 1749535622, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "worker LS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "ZHSHwJ-WMxVntYGdHzqAS", + "originalText": "worker LS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "bIAAQozA4blO6kgvckP3S", + "type": "rectangle", + "x": 1028.86328125, + "y": 611.146484375, + "width": 131.03125, + "height": 42.83984375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ab", + "roundness": { + "type": 3 + }, + "seed": 330947340, + "version": 845, + "versionNonce": 2039551962, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "O88cyEH5yk5c4puqvqvCF", + "type": "text", + "x": 1059.6089401245117, + "y": 620.06640625, + "width": 69.53993225097656, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ac", + "roundness": null, + "seed": 1482856844, + "version": 776, + "versionNonce": 1972398790, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "SFC LS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "bIAAQozA4blO6kgvckP3S", + "originalText": "SFC LS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "pVasypJX2u2AP43S1rJ-H", + "type": "rectangle", + "x": 480.009765625, + "y": 450.455078125, + "width": 891.2070312499999, + "height": 289.640625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ah", + "roundness": { + "type": 3 + }, + "seed": 1392129420, + "version": 528, + "versionNonce": 62172442, + "isDeleted": true, + "boundElements": [], + "updated": 1769211638753, + "link": null, + "locked": false + }, + { + "id": "oA5KoMQjshRzJUNQcjaiC", + "type": "text", + "x": 500.03125, + "y": 461.015625, + "width": 333.6326293945313, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aj", + "roundness": null, + "seed": 1841999628, + "version": 121, + "versionNonce": 974458010, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "DPU - K2 Kubernetes Cluster", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "DPU - K2 Kubernetes Cluster", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "DXSgUtwjW1M5OtOfM2oOj", + "type": "text", + "x": 897.46875, + "y": 400.98828125, + "width": 35.91996765136719, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aq", + "roundness": null, + "seed": 1235853748, + "version": 93, + "versionNonce": 865510426, + "isDeleted": true, + "boundElements": [], + "updated": 1769211693537, + "link": null, + "locked": false, + "text": "VFs", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "VFs", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "jTuqSQnd1xqE5jzyNgo_V", + "type": "text", + "x": 922.2275161743164, + "y": 553.3203125, + "width": 80.11993408203125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ar", + "roundness": null, + "seed": 257701772, + "version": 685, + "versionNonce": 1728258566, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "VF Reps", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "VF Reps", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "kUpychMCsIeTRZMalnMDE", + "type": "rectangle", + "x": 504.18359375, + "y": 517.0546875, + "width": 140.1484375, + "height": 61.43359375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "as", + "roundness": { + "type": 3 + }, + "seed": 250474741, + "version": 420, + "versionNonce": 1471965530, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "KypHgViW6BpvPS6NP1wXN", + "type": "text", + "x": 528.0078582763672, + "y": 522.771484375, + "width": 92.49990844726562, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "at", + "roundness": null, + "seed": 1179519573, + "version": 388, + "versionNonce": 1604120902, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVN-Kube\nK1", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "kUpychMCsIeTRZMalnMDE", + "originalText": "OVN-Kube\nK1", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "R8DOjTmolTEKd9pKdGNyl", + "type": "rectangle", + "x": 563.0703125, + "y": 608.494140625, + "width": 176.24218750000003, + "height": 104.79296875000006, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "au", + "roundness": { + "type": 3 + }, + "seed": 2033483131, + "version": 692, + "versionNonce": 702164506, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "1A3hQ4xYut0uu_3QxzAE5", + "type": "text", + "x": 594.2914505004883, + "y": 635.890625, + "width": 113.79991149902344, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "av", + "roundness": null, + "seed": 1918703131, + "version": 671, + "versionNonce": 448452742, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "Shared OVN\nK1 and K2", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "R8DOjTmolTEKd9pKdGNyl", + "originalText": "Shared OVN\nK1 and K2", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "3kqb-K8O9f08ArUDv9Cjf", + "type": "line", + "x": 821.24609375, + "y": 401.33984375, + "width": 113.5859375, + "height": 209.3515625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aw", + "roundness": { + "type": 2 + }, + "seed": 2063502261, + "version": 139, + "versionNonce": 1596119770, + "isDeleted": true, + "boundElements": [], + "updated": 1769211662686, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 113.5859375, + 209.3515625 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "4QSR0lJotZNZzQEMYnuKY", + "type": "line", + "x": 1010.19140625, + "y": 400.0546875, + "width": 21.296875, + "height": 211.91796875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ax", + "roundness": { + "type": 2 + }, + "seed": 1579370107, + "version": 196, + "versionNonce": 1847422726, + "isDeleted": true, + "boundElements": [], + "updated": 1769211665400, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -21.296875, + 211.91796875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "5_Xt3FUxVeazyDS7r-Ucr", + "type": "arrow", + "x": 595.2326807094685, + "y": 579.48828125, + "width": 14.427475540531532, + "height": 25.95703125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b02", + "roundness": { + "type": 2 + }, + "seed": 1308316949, + "version": 81, + "versionNonce": 627137242, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 14.427475540531532, + 25.95703125 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "kUpychMCsIeTRZMalnMDE", + "mode": "orbit", + "fixedPoint": [ + 0.5315652620380965, + 0.531565262038097 + ] + }, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "QqSmIOXcdN0n9q_f3P72z", + "type": "arrow", + "x": 744.84765625, + "y": 648.58203125, + "width": 124.9464277396745, + "height": 20.664800962877393, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b03", + "roundness": { + "type": 2 + }, + "seed": 2041478555, + "version": 384, + "versionNonce": 1612306374, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 124.9464277396745, + -20.664800962877393 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.5237502641723047, + 0.5237502641723036 + ] + }, + "endBinding": { + "elementId": "ZHSHwJ-WMxVntYGdHzqAS", + "mode": "orbit", + "fixedPoint": [ + 0.23070594032628622, + 0.23070594032628763 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "Xo9luBmfagxGHywoTzuWJ", + "type": "line", + "x": 1090.875, + "y": 612.74609375, + "width": 118.109375, + "height": 61.48046875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b06", + "roundness": { + "type": 2 + }, + "seed": 1795601781, + "version": 77, + "versionNonce": 686092186, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 118.109375, + -61.48046875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "FrZUA_nXF7jrENeAm_iuN", + "type": "line", + "x": 1122.578125, + "y": 611.7421875, + "width": 87.8984375, + "height": 48.03125, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b07", + "roundness": { + "type": 2 + }, + "seed": 140615867, + "version": 65, + "versionNonce": 2010574598, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 87.8984375, + -48.03125 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "h0gFSUv-MCabHAwjdzstW", + "type": "line", + "x": 1140.62890625, + "y": 655.7109375, + "width": 76.375, + "height": 37.7421875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b08", + "roundness": { + "type": 2 + }, + "seed": 735261141, + "version": 47, + "versionNonce": 1217915994, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 76.375, + 37.7421875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "uyp2yTlDgYoUCgRf1t2Mw", + "type": "line", + "x": 1160.61328125, + "y": 646.52734375, + "width": 57.1875, + "height": 25.95703125, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b09", + "roundness": { + "type": 2 + }, + "seed": 209223931, + "version": 31, + "versionNonce": 510598726, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 57.1875, + 25.95703125 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "8RZ9vzUTGLaqkOXekPLDQ", + "type": "arrow", + "x": 747.41015625, + "y": 697.359375, + "width": 286.4296875, + "height": 37.015625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0A", + "roundness": { + "type": 2 + }, + "seed": 1520074645, + "version": 64, + "versionNonce": 441274650, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 286.4296875, + -37.015625 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.8833472697193855, + 0.8833472697193837 + ] + }, + "endBinding": { + "elementId": "bIAAQozA4blO6kgvckP3S", + "mode": "orbit", + "fixedPoint": [ + 0.8338256639609369, + 0.8338256639609338 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "5hOE-WQdQ6EGLVGOoP1I2", + "type": "arrow", + "x": 717.5696974855308, + "y": 581.90234375, + "width": 21.679994111348492, + "height": 25.3359375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0B", + "roundness": { + "type": 2 + }, + "seed": 1498907035, + "version": 49, + "versionNonce": 2025809286, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -21.679994111348492, + 25.3359375 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "o-BS3wjgvpBdXfEGxS4zJ", + "mode": "orbit", + "fixedPoint": [ + 0.6290772163270132, + 0.6290772163270139 + ] + }, + "endBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.4954418212421667, + 0.49544182124216624 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "tYzSvRmwfQQNkfrVTe1P5", + "type": "rectangle", + "x": 992.984375, + "y": 572.734375, + "width": 234.94921875, + "height": 144.21484375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0C", + "roundness": { + "type": 3 + }, + "seed": 198185882, + "version": 375, + "versionNonce": 411814042, + "isDeleted": false, + "boundElements": [ + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow" + } + ], + "updated": 1769211901272, + "link": null, + "locked": false + }, + { + "id": "lYB2Gm5YLXtUdQ0eGqYgo", + "type": "freedraw", + "x": 1208.921875, + "y": 367.125, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0D", + "roundness": null, + "seed": 901415366, + "version": 4, + "versionNonce": 110577818, + "isDeleted": true, + "boundElements": [], + "updated": 1769211701877, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true + }, + { + "id": "tsRvMJO_5sEYyzqg0Hh7c", + "type": "line", + "x": 929.01171875, + "y": 535.390625, + "width": 65.09375, + "height": 64.51171875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0E", + "roundness": { + "type": 2 + }, + "seed": 1228634182, + "version": 113, + "versionNonce": 628743877, + "isDeleted": false, + "boundElements": [], + "updated": 1769807129093, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 65.09375, + 64.51171875 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "oACgbs4cMqbhu8HtPebFx", + "type": "line", + "x": 932.6796875, + "y": 709.16015625, + "width": 61.11328125, + "height": 36.8828125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0F", + "roundness": { + "type": 2 + }, + "seed": 983491994, + "version": 68, + "versionNonce": 1634049963, + "isDeleted": false, + "boundElements": [], + "updated": 1769807122256, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 61.11328125, + -36.8828125 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "3d5gOfYF1IUhTLFt9bgVo", + "type": "rectangle", + "x": 981.53125, + "y": 261.81640625, + "width": 263.3984375, + "height": 76.671875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0G", + "roundness": { + "type": 3 + }, + "seed": 160618182, + "version": 76, + "versionNonce": 374670022, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + } + ], + "updated": 1769211893845, + "link": null, + "locked": false + }, + { + "id": "otyemJxFJECPTvDVhd1hb", + "type": "text", + "x": 1047.35546875, + "y": 283.36328125, + "width": 129.50054931640625, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0H", + "roundness": null, + "seed": 400406918, + "version": 91, + "versionNonce": 1488222219, + "isDeleted": false, + "boundElements": [], + "updated": 1769807077202, + "link": null, + "locked": false, + "text": "OVN-Kube", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN-Kube", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "1chdpPBLx0VEf6sBwnyrT", + "type": "rectangle", + "x": 542.08203125, + "y": 300.78515625, + "width": 179.29296875, + "height": 75.30859375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0I", + "roundness": { + "type": 3 + }, + "seed": 1050229210, + "version": 33, + "versionNonce": 549854406, + "isDeleted": false, + "boundElements": [], + "updated": 1769211856008, + "link": null, + "locked": false + }, + { + "id": "E1qyG2mrQnzUtFZRnlzon", + "type": "text", + "x": 577.8984375, + "y": 320.80078125, + "width": 99.06443786621094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0J", + "roundness": null, + "seed": 1111918810, + "version": 12, + "versionNonce": 1083119301, + "isDeleted": false, + "boundElements": [], + "updated": 1769807072474, + "link": null, + "locked": false, + "text": "Kubelet", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubelet", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "YPI4K5hgJbgHj0hYUQy7U", + "type": "rectangle", + "x": 983.12890625, + "y": 410.5234375, + "width": 260.94921875, + "height": 99.75390625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0K", + "roundness": { + "type": 3 + }, + "seed": 2135829062, + "version": 161, + "versionNonce": 1398047066, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + }, + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow" + } + ], + "updated": 1769211899581, + "link": null, + "locked": false + }, + { + "id": "xdskfV2GHpQr09QrrldPB", + "type": "text", + "x": 1081.9375, + "y": 441.5078125, + "width": 55.74822998046875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0L", + "roundness": null, + "seed": 1687121690, + "version": 128, + "versionNonce": 1673910021, + "isDeleted": false, + "boundElements": [], + "updated": 1769807080854, + "link": null, + "locked": false, + "text": "OVN", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow", + "x": 1108.43359375, + "y": 337.89453125, + "width": 2.078125, + "height": 73.390625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0M", + "roundness": { + "type": 2 + }, + "seed": 1411280134, + "version": 63, + "versionNonce": 800499334, + "isDeleted": false, + "boundElements": [], + "updated": 1769211896239, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 2.078125, + 73.390625 + ] + ], + "startBinding": { + "elementId": "3d5gOfYF1IUhTLFt9bgVo", + "mode": "inside", + "fixedPoint": [ + 0.48178852142963075, + 0.9922559608722233 + ] + }, + "endBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.48815172971273746, + 0.007635979167482476 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow", + "x": 1112.98828125, + "y": 508.2265625, + "width": 0.453125, + "height": 66.83984375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0N", + "roundness": { + "type": 2 + }, + "seed": 1453237018, + "version": 64, + "versionNonce": 374911322, + "isDeleted": false, + "boundElements": [], + "updated": 1769211905520, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -0.453125, + 66.83984375 + ] + ], + "startBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.4976423214526294, + 0.9794415945490856 + ] + }, + "endBinding": { + "elementId": "tYzSvRmwfQQNkfrVTe1P5", + "mode": "inside", + "fixedPoint": [ + 0.5088366834588591, + 0.016170535496627753 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "nOHqcMRCCN5CvsIk9L7HK", + "type": "text", + "x": 1083.96875, + "y": 624.0546875, + "width": 55.468231201171875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0O", + "roundness": null, + "seed": 260809434, + "version": 68, + "versionNonce": 1104957285, + "isDeleted": false, + "boundElements": [], + "updated": 1769807084822, + "link": null, + "locked": false, + "text": "OVS", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "4IG9MXRe5ijYB-oIc24C6", + "type": "rectangle", + "x": 1067.27734375, + "y": 782.578125, + "width": 97.47265625, + "height": 31.69921875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0P", + "roundness": { + "type": 3 + }, + "seed": 1419364058, + "version": 56, + "versionNonce": 1950880902, + "isDeleted": false, + "boundElements": [], + "updated": 1769211952988, + "link": null, + "locked": false + }, + { + "id": "nIejWKUhYhhAgOj88nUB7", + "type": "text", + "x": 1086.921875, + "y": 786.23828125, + "width": 64.98826599121094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Q", + "roundness": null, + "seed": 1034482586, + "version": 33, + "versionNonce": 1547842859, + "isDeleted": false, + "boundElements": [], + "updated": 1769807134429, + "link": null, + "locked": false, + "text": "eth0", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "eth0", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xkJunDpSamP7rfEtN7oH1", + "type": "line", + "x": 1115.1484375, + "y": 783.14453125, + "width": 1.234375, + "height": 65.4609375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0R", + "roundness": { + "type": 2 + }, + "seed": 1791101850, + "version": 51, + "versionNonce": 500083802, + "isDeleted": false, + "boundElements": [], + "updated": 1769211973078, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -1.234375, + -65.4609375 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "lzux7Pz5JYywiZFFYn3c9", + "type": "diamond", + "x": 746.962890625, + "y": 619.037109375, + "width": 191.01171874999997, + "height": 175.77734374999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0S", + "roundness": { + "type": 2 + }, + "seed": 1138066597, + "version": 917, + "versionNonce": 364881579, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "ZIHVxeeORZWrsAYttnEg3" + } + ], + "updated": 1769807115636, + "link": null, + "locked": false + }, + { + "id": "ZIHVxeeORZWrsAYttnEg3", + "type": "text", + "x": 800.2396469116211, + "y": 689.4814453125, + "width": 84.95234680175781, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0T", + "roundness": null, + "seed": 979871749, + "version": 870, + "versionNonce": 1945999973, + "isDeleted": false, + "boundElements": [], + "updated": 1769807118450, + "link": null, + "locked": false, + "text": "Pod B", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "lzux7Pz5JYywiZFFYn3c9", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff", + "lockedMultiSelections": {} + }, + "files": {} +} \ No newline at end of file diff --git a/docs/images/ovnk-unaccelerated.svg b/docs/images/ovnk-unaccelerated.svg new file mode 100644 index 0000000000..a2bb8222cf --- /dev/null +++ b/docs/images/ovnk-unaccelerated.svg @@ -0,0 +1,4 @@ + + +Kubernetes Worker NodePod AOVN-KubeKubeletOVNOVSeth0Pod B \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 3d09e08387..1358da3b80 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -86,6 +86,7 @@ nav: - Getting Started: - Launching OVN-Kubernetes: installation/launching-ovn-kubernetes-on-kind.md - Launching OVN-Kubernetes Using Helm: installation/launching-ovn-kubernetes-with-helm.md + - Launching OVN-Kubernetes with DPU Acceleration: installation/launching-ovn-kubernetes-with-dpu.md - Configuration Guide: getting-started/configuration.md - CLI Guide: getting-started/cli-guide.md - Deploying Workloads on OVN-Kubernetes cluster: getting-started/example-pod-creation.md From 2edd4307e0a6abd039d3bab16de1a46c87ab6a08 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Fri, 13 Feb 2026 00:06:46 -0800 Subject: [PATCH 06/59] fix kind load docker-image content digest not found `kind setup` starts to fail with error: ``` ERROR: failed to load image: command "docker exec --privileged -i ovn-worker ctr --namespace=k8s.io images import --all-platforms --digests --snapshotter=overlayfs -" failed with error: exit status 1 Command Output: ctr: content digest sha256:9c04829e9...: not found ``` Related kind issue is https://github.com/kubernetes-sigs/kind/issues/3795. This change uses the workaround mentioned in the kind issue. Signed-off-by: Lei Huang --- .github/workflows/performance-test.yml | 14 ++++++++ .github/workflows/test.yml | 46 ++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml index 9369e659a2..7f691f6b3b 100644 --- a/.github/workflows/performance-test.yml +++ b/.github/workflows/performance-test.yml @@ -209,6 +209,20 @@ jobs: run: | sudo ufw disable + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Download test-image-pr uses: actions/download-artifact@v4 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4955bfffc2..e70c2d9f5d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -355,9 +355,24 @@ jobs: run: | sudo ufw disable + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_BASE_TAR} && rm -rf ${CI_IMAGE_BASE_TAR} + docker images || true - name: kind setup run: | @@ -634,9 +649,24 @@ jobs: with: name: test-image-pr + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_PR_TAR} && rm -rf ${CI_IMAGE_PR_TAR} + docker images || true - name: kind setup timeout-minutes: 30 @@ -791,9 +821,25 @@ jobs: with: name: test-image-pr + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_PR_TAR} && rm -rf ${CI_IMAGE_PR_TAR} + docker images || true + - name: kind IPv4 setup run: | From 31ee5d7b31c8cb13d35be107922b389dd1085c55 Mon Sep 17 00:00:00 2001 From: fangyuchen86 Date: Fri, 13 Feb 2026 21:20:26 +0800 Subject: [PATCH 07/59] Add SAIC Motor to ADOPTERS Signed-off-by: fangyuchen86 --- ADOPTERS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ADOPTERS.md b/ADOPTERS.md index 0254f998c3..654a19a650 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -5,6 +5,7 @@ 1. Red Hat, Inc. (Uses OVN-Kubernetes as their default CNI in OpenShift product) 2. NVIDIA (Uses OVN-Kubernetes in their production environments) 3. Internet Initiative Japan Inc. (Uses OVN-Kubernetes in their on-premise Kubernetes platform) +4. SAIC Motor Corp. Ltd (Use OVN-Kubernetes as network solution to build multi-tenant private cloud) ## Projects From 6f78d1d2cd625ced890c3c31bcce20c44e82802a Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Fri, 13 Feb 2026 12:32:23 +0100 Subject: [PATCH 08/59] Handle Docker 29+ "invalid IP" response for IP addresses Signed-off-by: Patryk Diak --- test/e2e/infraprovider/providers/kind/kind.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/e2e/infraprovider/providers/kind/kind.go b/test/e2e/infraprovider/providers/kind/kind.go index 532a45fe5d..14e850e496 100644 --- a/test/e2e/infraprovider/providers/kind/kind.go +++ b/test/e2e/infraprovider/providers/kind/kind.go @@ -607,6 +607,8 @@ const ( inspectNetworkMACKeyStr = "{{ with index .NetworkSettings.Networks %q }}{{ .MacAddress }}{{ end }}" inspectNetworkContainersKeyStr = "{{ range $key, $value := .Containers }}{{ printf \"%s\\n\" $value.Name}}{{ end }}'" emptyValue = "" + // Docker 29+ returns "invalid IP" for IP fields + emptyIPValue = "invalid IP" ) func isNetworkAttachedToContainer(networkName, containerName string) bool { @@ -715,7 +717,7 @@ func getNetworkInterface(containerName, networkName string) (api.NetworkInterfac } valueStr := strings.Trim(string(value), "\n") valueStr = strings.Trim(valueStr, "'") - if valueStr == emptyValue { + if valueStr == emptyValue || valueStr == emptyIPValue { return "", nil } return valueStr, nil From e95dc868d2a261ea4ce3fff403709cac5cfbf8cc Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Thu, 5 Feb 2026 14:10:20 -0500 Subject: [PATCH 09/59] Make node gateway Dynamic UDN aware The node gateway logic was not taking into account dynamic UDN. Therefore if a UDN was created with a service, but our node was not active, then at start up during syncServices we would fail due to GetActiveNetworkForNamespace failing. After 60 seconds of syncServices failling, it would lead to OVN-Kube node crashing. This commit introduces a common helper function to network manager api, ResolveActiveNetworkForNamespaceOnNode, which will allow legacy controllers that are not per-UDN or default controller to find the primary network serving a namespace for their node. The node/gateway is updated to use this function and during sync and allows us to ignore objects for which the network is not on our node with Dynamic UDN. Additionally it does not fail syncServices when a network is not found. During NAD controller start up, all networks will have been processed. If by the time gateway starts up and the network is missing, that means it is a new event which this node has never seen before. Therefore it is safe to skip it during syncServices and allow initial add handling to take care of it later. Signed-off-by: Tim Rozet --- go-controller/pkg/networkmanager/api.go | 46 ++++ go-controller/pkg/node/gateway_init.go | 2 +- go-controller/pkg/node/gateway_shared_intf.go | 133 ++++++++--- .../pkg/node/gateway_shared_intf_test.go | 219 ++++++++++++++++++ go-controller/pkg/node/healthcheck_service.go | 2 + go-controller/pkg/util/util.go | 3 + 6 files changed, 369 insertions(+), 36 deletions(-) diff --git a/go-controller/pkg/networkmanager/api.go b/go-controller/pkg/networkmanager/api.go index 7c47997276..a8940ec87d 100644 --- a/go-controller/pkg/networkmanager/api.go +++ b/go-controller/pkg/networkmanager/api.go @@ -3,6 +3,7 @@ package networkmanager import ( "context" "errors" + "fmt" nadinformers "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1" @@ -106,6 +107,51 @@ type Controller interface { Stop() } +// ResolveActiveNetworkForNamespaceOnNode returns the active primary network for the namespace only if +// the network is active on the given node. It uses the NAD cache to resolve the namespace's primary +// NAD and network name, checks NodeHasNetwork, and then calls GetActiveNetworkForNamespace to fetch +// the rendered NetInfo. It returns (nil, false, nil) when the network is not active on the node. +func ResolveActiveNetworkForNamespaceOnNode(nm Interface, nodeName, namespace string) (util.NetInfo, bool, error) { + if nm == nil { + return nil, false, fmt.Errorf("network manager is nil") + } + + nadKey, err := nm.GetPrimaryNADForNamespace(namespace) + if err != nil { + return nil, false, err + } + if nadKey == "" { + // Namespace is gone + return nil, false, nil + } + + if nadKey == types.DefaultNetworkName { + netInfo, err := nm.GetActiveNetworkForNamespace(namespace) + if err != nil { + return nil, false, err + } + return netInfo, true, nil + } + + networkName := nm.GetNetworkNameForNADKey(nadKey) + if networkName == "" { + return nil, false, fmt.Errorf("no primary network found for namespace %s", namespace) + } + + if !nm.NodeHasNetwork(nodeName, networkName) { + return nil, false, nil + } + + // At this point the namespace's primary NAD is known and the network is active on this node, + // so GetActiveNetworkForNamespace should not normally return InvalidPrimaryNetworkError. + // Any error here is treated as transient/inconsistent state. + netInfo, err := nm.GetActiveNetworkForNamespace(namespace) + if err != nil { + return nil, false, err + } + return netInfo, true, nil +} + // Default returns a default implementation that assumes the default network is // the only ever existing network. Used when multi-network capabilities are not // enabled or testing. diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index f0eb9094d6..75d704a64a 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -507,7 +507,7 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost() error { return fmt.Errorf("unable to configure UDN nftables: %w", err) } } - gw.nodePortWatcherIptables = newNodePortWatcherIptables(nc.networkManager) + gw.nodePortWatcherIptables = newNodePortWatcherIptables(nc.name, nc.networkManager) gw.loadBalancerHealthChecker = newLoadBalancerHealthChecker(nc.name, nc.watchFactory) portClaimWatcher, err := newPortClaimWatcher(nc.recorder) if err != nil { diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index d24589598e..33d922acdb 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -176,11 +176,13 @@ func configureUDNServicesNFTables() error { // nodePortWatcherIptables manages iptables rules for shared gateway // to ensure that services using NodePorts are accessible. type nodePortWatcherIptables struct { + nodeName string networkManager networkmanager.Interface } -func newNodePortWatcherIptables(networkManager networkmanager.Interface) *nodePortWatcherIptables { +func newNodePortWatcherIptables(nodeName string, networkManager networkmanager.Interface) *nodePortWatcherIptables { return &nodePortWatcherIptables{ + nodeName: nodeName, networkManager: networkManager, } } @@ -189,6 +191,7 @@ func newNodePortWatcherIptables(networkManager networkmanager.Interface) *nodePo // to ensure that services using NodePorts are accessible type nodePortWatcher struct { dpuMode bool + nodeName string gatewayIPv4 string gatewayIPv6 string gatewayIPLock sync.Mutex @@ -890,13 +893,17 @@ func (npw *nodePortWatcher) AddService(service *corev1.Service) error { klog.V(5).Infof("Adding service %s in namespace %s", service.Name, service.Namespace) - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) + netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npw.networkManager, + npw.nodeName, + service.Namespace, + ) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return nil - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", service.Name, service.Namespace, err) } + if !activeOnNode { + return nil + } name := ktypes.NamespacedName{Namespace: service.Namespace, Name: service.Name} epSlices, err := npw.watchFactory.GetServiceEndpointSlices(service.Namespace, service.Name, netInfo.GetNetworkName()) @@ -975,13 +982,17 @@ func (npw *nodePortWatcher) UpdateService(old, new *corev1.Service) error { if util.ServiceTypeHasClusterIP(new) && util.IsClusterIPSet(new) { klog.V(5).Infof("Adding new service rules for: %v", new) - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(new.Namespace) + netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npw.networkManager, + npw.nodeName, + new.Namespace, + ) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return utilerrors.Join(errors...) - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } + if !activeOnNode { + return utilerrors.Join(errors...) + } if err = addServiceRules(new, netInfo, svcConfig.localEndpoints, svcConfig.hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) @@ -1218,15 +1229,24 @@ func (npw *nodePortWatcher) SyncServices(services []interface{}) error { continue } - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) - // The InvalidPrimaryNetworkError is returned when the UDN is not found because it has already been deleted. - if util.IsInvalidPrimaryNetworkError(err) { - continue - } + netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npw.networkManager, + npw.nodeName, + service.Namespace, + ) if err != nil { + // During startup sync, avoid failing the entire processExisting loop for namespaces that + // require a UDN but have no primary NAD yet (or it has been deleted). Those services will + // be reconciled later via regular add/update events once the NAD exists. + if util.IsUnprocessedActiveNetworkError(err) || util.IsInvalidPrimaryNetworkError(err) { + continue + } errors = append(errors, err) continue } + if !activeOnNode { + continue + } epSlices, err := npw.watchFactory.GetServiceEndpointSlices(service.Namespace, service.Name, netInfo.GetNetworkName()) if err != nil { @@ -1303,10 +1323,17 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e var errors []error var svc *corev1.Service - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(epSlice.Namespace) + netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npw.networkManager, + npw.nodeName, + epSlice.Namespace, + ) if err != nil { return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", epSlice.Name, epSlice.Namespace, err) } + if !activeOnNode { + return nil + } if util.IsNetworkSegmentationSupportEnabled() && !util.IsEndpointSliceForNetwork(epSlice, netInfo) { return nil @@ -1423,23 +1450,24 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice // Get network info after deleting old rules, before adding new ones. // This ensures old rules are cleaned up even if namespace/network is deleted, // and allows graceful handling of deletion race conditions. - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(namespacedName.Namespace) + netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npw.networkManager, + npw.nodeName, + namespacedName.Namespace, + ) if err != nil { - // If the namespace was deleted, skip adding new service rules - if apierrors.IsNotFound(err) { - klog.V(5).Infof("Namespace not found for service %s/%s during endpoint slice delete, skipping adding service rules", - namespacedName.Namespace, namespacedName.Name) - return utilerrors.Join(errors...) - } - // If the UDN was deleted, skip adding new service rules - if util.IsInvalidPrimaryNetworkError(err) { - klog.V(5).Infof("Skipping addServiceRules for %s/%s during endpoint slice delete: primary network invalid: %v", + // If the UDN was deleted or not processed yet, skip adding new service rules + if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { + klog.V(5).Infof("Skipping addServiceRules for %s/%s during endpoint slice delete: primary network unavailable: %v", namespacedName.Namespace, namespacedName.Name, err) return utilerrors.Join(errors...) } errors = append(errors, fmt.Errorf("error getting active network for service %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err)) return utilerrors.Join(errors...) } + if !activeOnNode { + return utilerrors.Join(errors...) + } if err = addServiceRules(svcConfig.service, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) @@ -1476,10 +1504,17 @@ func (npw *nodePortWatcher) UpdateEndpointSlice(oldEpSlice, newEpSlice *discover var err error var errors []error - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(newEpSlice.Namespace) + netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npw.networkManager, + npw.nodeName, + newEpSlice.Namespace, + ) if err != nil { return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", newEpSlice.Name, newEpSlice.Namespace, err) } + if !activeOnNode { + return nil + } if util.IsNetworkSegmentationSupportEnabled() && !util.IsEndpointSliceForNetwork(newEpSlice, netInfo) { return nil @@ -1564,13 +1599,17 @@ func (npwipt *nodePortWatcherIptables) AddService(service *corev1.Service) error return nil } - netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(service.Namespace) + netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npwipt.networkManager, + npwipt.nodeName, + service.Namespace, + ) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return nil - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", service.Name, service.Namespace, err) } + if !activeOnNode { + return nil + } if err := addServiceRules(service, netInfo, nil, false, nil); err != nil { return fmt.Errorf("AddService failed for nodePortWatcherIptables: %v", err) @@ -1595,13 +1634,17 @@ func (npwipt *nodePortWatcherIptables) UpdateService(old, new *corev1.Service) e } if util.ServiceTypeHasClusterIP(new) && util.IsClusterIPSet(new) { - netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(new.Namespace) + netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npwipt.networkManager, + npwipt.nodeName, + new.Namespace, + ) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return utilerrors.Join(errors...) - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } + if !activeOnNode { + return utilerrors.Join(errors...) + } if err = addServiceRules(new, netInfo, nil, false, nil); err != nil { errors = append(errors, err) @@ -1642,6 +1685,24 @@ func (npwipt *nodePortWatcherIptables) SyncServices(services []interface{}) erro if !util.ServiceTypeHasClusterIP(service) || !util.IsClusterIPSet(service) { continue } + _, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( + npwipt.networkManager, + npwipt.nodeName, + service.Namespace, + ) + if err != nil { + // During startup sync, avoid failing the entire processExisting loop for namespaces that + // require a UDN but have no primary NAD yet (or it has been deleted). Those services will + // be reconciled later via regular add/update events once the NAD exists. + if util.IsUnprocessedActiveNetworkError(err) || util.IsInvalidPrimaryNetworkError(err) { + continue + } + errors = append(errors, err) + continue + } + if !activeOnNode { + continue + } // Add correct iptables rules. // TODO: ETP and ITP is not implemented for smart NIC mode. keepIPTRules = append(keepIPTRules, getGatewayIPTRules(service, nil, false)...) @@ -1791,7 +1852,7 @@ func newGateway( if config.Gateway.NodeportEnable { klog.Info("Creating Gateway Node Port Watcher") - gw.nodePortWatcher, err = newNodePortWatcher(gwBridge, gw.openflowManager, gw.nodeIPManager, watchFactory, networkManager) + gw.nodePortWatcher, err = newNodePortWatcher(nodeName, gwBridge, gw.openflowManager, gw.nodeIPManager, watchFactory, networkManager) if err != nil { return err } @@ -1812,6 +1873,7 @@ func newGateway( } func newNodePortWatcher( + nodeName string, gwBridge *bridgeconfig.BridgeConfiguration, ofm *openflowManager, nodeIPManager *addressManager, @@ -1880,6 +1942,7 @@ func newNodePortWatcher( npw := &nodePortWatcher{ dpuMode: dpuMode, + nodeName: nodeName, gatewayIPv4: gatewayIPv4, gatewayIPv6: gatewayIPv6, ofportPhys: ofportPhys, diff --git a/go-controller/pkg/node/gateway_shared_intf_test.go b/go-controller/pkg/node/gateway_shared_intf_test.go index 065b7c52ad..38cc1efe24 100644 --- a/go-controller/pkg/node/gateway_shared_intf_test.go +++ b/go-controller/pkg/node/gateway_shared_intf_test.go @@ -11,6 +11,7 @@ import ( corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/kubernetes/fake" @@ -21,6 +22,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -38,6 +40,11 @@ type mockNetworkManagerWithNamespaceNotFoundError struct { networkmanager.Interface } +func (m *mockNetworkManagerWithNamespaceNotFoundError) GetPrimaryNADForNamespace(_ string) (string, error) { + // Simulate namespace deletion: no primary NAD by definition. + return "", nil +} + func (m *mockNetworkManagerWithNamespaceNotFoundError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { notFoundErr := apierrors.NewNotFound(schema.GroupResource{Resource: "namespaces"}, namespace) return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, notFoundErr) @@ -48,6 +55,11 @@ type mockNetworkManagerWithInvalidPrimaryNetworkError struct { networkmanager.Interface } +func (m *mockNetworkManagerWithInvalidPrimaryNetworkError) GetPrimaryNADForNamespace(_ string) (string, error) { + // just a trigger to ensure GetActiveNetworkForNamespace gets called + return types.DefaultNetworkName, nil +} + func (m *mockNetworkManagerWithInvalidPrimaryNetworkError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { return nil, util.NewInvalidPrimaryNetworkError(namespace) } @@ -57,10 +69,68 @@ type mockNetworkManagerWithError struct { networkmanager.Interface } +func (m *mockNetworkManagerWithError) GetPrimaryNADForNamespace(_ string) (string, error) { + // just a trigger to ensure GetActiveNetworkForNamespace gets called + return types.DefaultNetworkName, nil +} + func (m *mockNetworkManagerWithError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { return nil, fmt.Errorf("network lookup failed for namespace %q", namespace) } +// mockNetworkManagerWithUnprocessedActiveNetworkError simulates a namespace that +// requires a UDN but the primary NAD has not been processed yet. +type mockNetworkManagerWithUnprocessedActiveNetworkError struct { + networkmanager.Interface +} + +func (m *mockNetworkManagerWithUnprocessedActiveNetworkError) GetPrimaryNADForNamespace(namespace string) (string, error) { + return "", util.NewUnprocessedActiveNetworkError(namespace, "") +} + +// mockNetworkManagerWithInactiveNode simulates a UDN where the node is inactive for the network. +type mockNetworkManagerWithInactiveNode struct { + networkmanager.Interface +} + +func (m *mockNetworkManagerWithInactiveNode) GetPrimaryNADForNamespace(_ string) (string, error) { + return "test-namespace/test-nad", nil +} + +func (m *mockNetworkManagerWithInactiveNode) GetNetworkNameForNADKey(_ string) string { + return "test-udn" +} + +func (m *mockNetworkManagerWithInactiveNode) NodeHasNetwork(_, _ string) bool { + return false +} + +func (m *mockNetworkManagerWithInactiveNode) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { + return nil, fmt.Errorf("unexpected GetActiveNetworkForNamespace call for %q", namespace) +} + +// mockNetworkManagerWithActiveUDN simulates a UDN active on this node. +type mockNetworkManagerWithActiveUDN struct { + networkmanager.Interface + netInfo util.NetInfo +} + +func (m *mockNetworkManagerWithActiveUDN) GetPrimaryNADForNamespace(_ string) (string, error) { + return "test-namespace/test-nad", nil +} + +func (m *mockNetworkManagerWithActiveUDN) GetNetworkNameForNADKey(_ string) string { + return m.netInfo.GetNetworkName() +} + +func (m *mockNetworkManagerWithActiveUDN) NodeHasNetwork(_, _ string) bool { + return true +} + +func (m *mockNetworkManagerWithActiveUDN) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + return m.netInfo, nil +} + // verifyIPTablesRule checks if an iptables rule exists and asserts the expected state func verifyIPTablesRule(ipt util.IPTablesHelper, serviceIP string, servicePort, nodePort int32, shouldExist bool, message string) { exists, err := ipt.Exists("nat", "OVN-KUBE-NODEPORT", @@ -256,3 +326,152 @@ var _ = Describe("DeleteEndpointSlice", func() { }) }) }) + +var _ = Describe("SyncServices", func() { + var ( + fakeClient *util.OVNNodeClientset + watcher *factory.WatchFactory + npw *nodePortWatcher + iptV4 util.IPTablesHelper + iptV6 util.IPTablesHelper + ) + + const ( + nodeName = "test-node" + testNamespace = "test-namespace" + testService = "test-service" + ) + + BeforeEach(func() { + var err error + Expect(config.PrepareTestConfig()).To(Succeed()) + config.Gateway.Mode = config.GatewayModeLocal + config.IPv4Mode = true + config.IPv6Mode = false + + fakeClient = &util.OVNNodeClientset{ + KubeClient: fake.NewSimpleClientset(), + } + fakeClient.AdminPolicyRouteClient = adminpolicybasedrouteclient.NewSimpleClientset() + fakeClient.NetworkAttchDefClient = nadfake.NewSimpleClientset() + fakeClient.UserDefinedNetworkClient = udnfakeclient.NewSimpleClientset() + + watcher, err = factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).NotTo(HaveOccurred()) + err = watcher.Start() + Expect(err).NotTo(HaveOccurred()) + + iptV4, iptV6 = util.SetFakeIPTablesHelpers() + npw = initFakeNodePortWatcher(iptV4, iptV6) + npw.watchFactory = watcher + npw.networkManager = networkmanager.Default().Interface() + + k := &kube.Kube{KClient: fakeClient.KubeClient} + npw.nodeIPManager = newAddressManagerInternal(nodeName, k, nil, watcher, nil, false) + }) + + AfterEach(func() { + watcher.Shutdown() + }) + + Context("when namespace requires UDN but NAD is unprocessed", func() { + It("should skip service sync without failing startup", func() { + service := newService(testService, testNamespace, "10.96.0.20", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30091, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + npw.networkManager = &mockNetworkManagerWithUnprocessedActiveNetworkError{} + + err := npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.20", 80, 30091, false, + "iptables rule should not be created when UDN is unprocessed") + }) + }) + + Context("when UDN is inactive on this node", func() { + It("should skip service sync without installing rules", func() { + service := newService(testService, testNamespace, "10.96.0.30", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30092, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + npw.networkManager = &mockNetworkManagerWithInactiveNode{} + + err := npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.30", 80, 30092, false, + "iptables rule should not be created when UDN is inactive on this node") + }) + }) + + Context("when UDN is active on this node", func() { + It("should install nodeport rules", func() { + // Avoid openflow dependency in this test. + config.Gateway.AllowNoUplink = true + npw.ofportPhys = "" + + service := newService(testService, testNamespace, "10.96.0.40", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30093, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + nad := ovntest.GenerateNAD("test-udn", "test-nad", testNamespace, types.Layer3Topology, "10.1.0.0/16", types.NetworkRolePrimary) + netInfo, err := util.ParseNADInfo(nad) + Expect(err).NotTo(HaveOccurred()) + npw.networkManager = &mockNetworkManagerWithActiveUDN{netInfo: netInfo} + + nodeName := npw.nodeIPManager.nodeName + epPortName := "http" + epPortValue := int32(8080) + epPortProtocol := corev1.ProtocolTCP + epSlice := &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: testService + "ab23", + Namespace: testNamespace, + Labels: map[string]string{ + types.LabelUserDefinedServiceName: testService, + }, + Annotations: map[string]string{ + types.UserDefinedNetworkEndpointSliceAnnotation: netInfo.GetNetworkName(), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{{ + Addresses: []string{"10.244.0.9"}, + NodeName: &nodeName, + }}, + Ports: []discovery.EndpointPort{{ + Name: &epPortName, + Protocol: &epPortProtocol, + Port: &epPortValue, + }}, + } + Expect(watcher.EndpointSliceInformer().GetStore().Add(epSlice)).To(Succeed()) + + err = npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.40", 80, 30093, true, + "iptables rule should be created when UDN is active on this node") + }) + }) +}) diff --git a/go-controller/pkg/node/healthcheck_service.go b/go-controller/pkg/node/healthcheck_service.go index dc906f4f1d..30ce4e793b 100644 --- a/go-controller/pkg/node/healthcheck_service.go +++ b/go-controller/pkg/node/healthcheck_service.go @@ -47,6 +47,8 @@ func (l *loadBalancerHealthChecker) AddService(svc *corev1.Service) error { if err := l.server.SyncServices(l.services); err != nil { return fmt.Errorf("unable to sync service %v; err: %v", name, err) } + // we can use CDN here and do not care about UDN because we are just looking for a count + // which will be the same between CDN and UDN epSlices, err := l.watchFactory.GetServiceEndpointSlices(svc.Namespace, svc.Name, types.DefaultNetworkName) if err != nil { return fmt.Errorf("could not fetch endpointslices "+ diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 76a8833f2a..6299177a70 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -366,6 +366,8 @@ func IsClusterIP(svcVIP string) bool { return false } +// UnprocessedActiveNetworkError indicates that the primary UDN is required and exists +// for a namespace, but our network manager has not processed it yet. type UnprocessedActiveNetworkError struct { namespace string udnName string @@ -385,6 +387,7 @@ func NewUnprocessedActiveNetworkError(namespace, udnName string) *UnprocessedAct return &UnprocessedActiveNetworkError{namespace: namespace, udnName: udnName} } +// InvalidPrimaryNetworkError indicates that the namespace requires a primary UDN, but no primary UDN exists yet type InvalidPrimaryNetworkError struct { namespace string } From e624ec9df3f4c6f9ea49cf7a884df03ffeb28b7d Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Thu, 5 Feb 2026 14:46:47 -0500 Subject: [PATCH 10/59] Fix GetActiveNetworkForNamespace usage for NetPol Network Policy add was not taking into account dynamic UDN. This was not a problem for the layer2/layer3 UDN controller side, because if the node was inactive, then the controllers wouldn't exist. However, it was a problem for the default network controller, because if the DNC could not get the active network, it would error and retry to add the KNP over and over again for other UDNs. This fixes it by checking the nad controller cache instead, which will always have the full info to determine if the KNP belongs to CDN. Furthermore, the delete KNP path was incorrect. It would try to get the active network which could be gone during deletion. This was unnecessary as the deleteNetworkPolicy code will check to see if it actually configured it in the first place, making it a noop to always call delete. Signed-off-by: Tim Rozet --- .../pkg/ovn/base_network_controller.go | 34 +++++++++++++------ .../pkg/ovn/base_network_controller_policy.go | 1 - 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index f67f3b3972..7656e41ebc 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -13,7 +13,6 @@ import ( corev1 "k8s.io/api/core/v1" knet "k8s.io/api/networking/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" @@ -1168,6 +1167,30 @@ func (bnc *BaseNetworkController) AddResourceCommon(objType reflect.Type, obj in if !ok { return fmt.Errorf("could not cast %T object to *knet.NetworkPolicy", obj) } + foundNamespaceNAD, err := bnc.networkManager.GetPrimaryNADForNamespace(np.Namespace) + if err != nil { + // If this is a UDN namespace that hasn't been processed yet, the default + // controller should skip it while UDN controllers should retry. + if bnc.GetNetworkName() == types.DefaultNetworkName && util.IsUnprocessedActiveNetworkError(err) { + return nil + } + // Retry until the NAD controller has processed the primary NAD for this namespace. + return fmt.Errorf("could not get primary network NAD for namespace %s: %v", np.Namespace, err) + } + if foundNamespaceNAD == types.DefaultNetworkName { + // Only the default network controller should handle policies in default namespaces. + if bnc.GetNetworkName() != types.DefaultNetworkName { + return nil + } + } else { + networkName := bnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) + if networkName == "" { + return fmt.Errorf("no primary network found for namespace %s", np.Namespace) + } + if bnc.GetNetworkName() != networkName { + return nil + } + } netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(np.Namespace) if err != nil { return fmt.Errorf("could not get active network for namespace %s: %v", np.Namespace, err) @@ -1193,15 +1216,6 @@ func (bnc *BaseNetworkController) DeleteResourceCommon(objType reflect.Type, obj if !ok { return fmt.Errorf("could not cast obj of type %T to *knet.NetworkPolicy", obj) } - netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(knp.Namespace) - // The InvalidPrimaryNetworkError is returned when the UDN is not found because it has already been deleted, - // while the NotFound error occurs when the namespace no longer exists. In both cases, proceed with deleting the NetworkPolicy. - if err != nil && !util.IsInvalidPrimaryNetworkError(err) && !apierrors.IsNotFound(err) { - return fmt.Errorf("could not get active network for namespace %s: %w", knp.Namespace, err) - } - if err == nil && bnc.GetNetworkName() != netinfo.GetNetworkName() { - return nil - } return bnc.deleteNetworkPolicy(knp) default: klog.Errorf("Can not process delete resource event, object type %s is not supported", objType) diff --git a/go-controller/pkg/ovn/base_network_controller_policy.go b/go-controller/pkg/ovn/base_network_controller_policy.go index 79a46449ae..223f13f6a2 100644 --- a/go-controller/pkg/ovn/base_network_controller_policy.go +++ b/go-controller/pkg/ovn/base_network_controller_policy.go @@ -1307,7 +1307,6 @@ func (bnc *BaseNetworkController) deleteNetworkPolicy(policy *knet.NetworkPolicy err := bnc.networkPolicies.DoWithLock(npKey, func(npKey string) error { np, ok := bnc.networkPolicies.Load(npKey) if !ok { - klog.Infof("Deleting policy %s that is already deleted", npKey) return nil } if err := bnc.cleanupNetworkPolicy(np); err != nil { From b79e6f949fd08416f682cd630cc7e088f0de9f1e Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Thu, 5 Feb 2026 15:12:11 -0500 Subject: [PATCH 11/59] Fix Dynamic UDN with services controller Needed to be updated for the same reasons as network policy. Services controller is per UDN, and with an inactive node this is not a problem for UDN controllers as they will not exist. However, for DNC it would continue failing to get active network here. Use the nad controller cache and shortcut the checks for default network controller. Signed-off-by: Tim Rozet --- .../services/services_controller.go | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index 83ccedca49..181e55eeae 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -600,22 +600,36 @@ func (c *Controller) RequestFullSync(nodeInfos []nodeInfo) { // belong to the network that this service controller is responsible for. func (c *Controller) skipService(name, namespace string) bool { if util.IsNetworkSegmentationSupportEnabled() { - serviceNetwork, err := c.networkManager.GetActiveNetworkForNamespace(namespace) + serviceNAD, err := c.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { + // If the namespace requires a UDN that hasn't been processed yet, the default controller + // should skip this service; the UDN controller will handle it once ready. + if util.IsUnprocessedActiveNetworkError(err) { + return c.netInfo.IsDefault() + } utilruntime.HandleError(fmt.Errorf("failed to retrieve network for service %s/%s: %w", namespace, name, err)) return true } + serviceNetworkName := types.DefaultNetworkName + isDefaultNetwork := serviceNAD == types.DefaultNetworkName + if !isDefaultNetwork { + serviceNetworkName = c.networkManager.GetNetworkNameForNADKey(serviceNAD) + if serviceNetworkName == "" { + return true + } + } + // Do not skip default network services enabled for UDN - if serviceNetwork.IsDefault() && + if isDefaultNetwork && c.netInfo.IsPrimaryNetwork() && globalconfig.Gateway.Mode == globalconfig.GatewayModeShared && util.IsUDNEnabledService(ktypes.NamespacedName{Namespace: namespace, Name: name}.String()) { return false } - if serviceNetwork.GetNetworkName() != c.netInfo.GetNetworkName() { + if serviceNetworkName != c.netInfo.GetNetworkName() { return true } } From 3ae25d2567b7257f3c37ff225e957ead736eaab5 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Thu, 5 Feb 2026 18:44:34 -0500 Subject: [PATCH 12/59] Guard GetPrimaryNADForNamespace when netseg not enabled Should always just return default network in that case. Signed-off-by: Tim Rozet --- go-controller/pkg/networkmanager/nad_controller.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/networkmanager/nad_controller.go b/go-controller/pkg/networkmanager/nad_controller.go index b282535f93..63335beea0 100644 --- a/go-controller/pkg/networkmanager/nad_controller.go +++ b/go-controller/pkg/networkmanager/nad_controller.go @@ -907,8 +907,11 @@ func (c *nadController) GetActiveNetworkForNamespaceFast(namespace string) util. // GetPrimaryNADForNamespace returns the full namespaced key of the // primary NAD for the given namespace, if one exists. -// Returns default network if namespace has no primary UDN +// Returns default network if namespace has no primary UDN or Network Segmentation is disabled func (c *nadController) GetPrimaryNADForNamespace(namespace string) (string, error) { + if !util.IsNetworkSegmentationSupportEnabled() { + return types.DefaultNetworkName, nil + } c.RLock() primary := c.primaryNADs[namespace] c.RUnlock() From f1a65d47e04c842bfb52d358df6c1fe0cde79ff5 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Thu, 5 Feb 2026 19:07:54 -0500 Subject: [PATCH 13/59] Fix EgressFirewall GetActiveNetworkForNamespace EF calls GetActiveNetworkForNamespace in an initialSync migration function. This function moves from cluster port group to namespace pgs. It is old and could be argued to just remove the code, but for now move to use nad controller cache. Also, do not cause OVNK to exit if we cannot get the network name, and just skip that entity. Signed-off-by: Tim Rozet --- .../controller/egressfirewall/egressfirewall.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go index d537804f56..e3a4009642 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go @@ -948,8 +948,8 @@ func (oc *EFController) moveACLsToNamespacedPortGroups(existingEFNamespaces map[ if namespace != "" && existingEFNamespaces[namespace] { pgName, err := oc.getNamespacePortGroupName(namespace) if err != nil { - return fmt.Errorf("failed to get port group name for egress firewall ACL move with "+ - "namespace: %s, err: %w", namespace, err) + klog.Warningf("Skipping egress firewall ACL move for namespace %s: %v", namespace, err) + continue } // re-attach from ClusterPortGroupNameBase to namespaced port group. // port group should exist, because namespace handler will create it. @@ -1088,11 +1088,18 @@ func getNamespacePortGroupDbIDs(ns string, controller string) *libovsdbops.DbObj } func (oc *EFController) getNamespacePortGroupName(namespace string) (string, error) { - activeNetwork, err := oc.networkManager.GetActiveNetworkForNamespace(namespace) + nadKey, err := oc.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { - return "", fmt.Errorf("failed to get active network for namespace %s: %w", namespace, err) + return "", fmt.Errorf("failed to get primary NAD for namespace %s: %w", namespace, err) } - ownerController := activeNetwork.GetNetworkName() + "-network-controller" + networkName := types.DefaultNetworkName + if nadKey != types.DefaultNetworkName && nadKey != "" { + networkName = oc.networkManager.GetNetworkNameForNADKey(nadKey) + if networkName == "" { + return "", fmt.Errorf("failed to resolve network name for NAD %s in namespace %s", nadKey, namespace) + } + } + ownerController := networkName + "-network-controller" return libovsdbutil.GetPortGroupName(getNamespacePortGroupDbIDs(namespace, ownerController)), nil } From 9ab81e578e3803a97a263d0a1adb1ff6201eb461 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 6 Feb 2026 09:23:56 -0500 Subject: [PATCH 14/59] Bring EgressIP inline with NAD Reconciliation Egress IP controller runs as part of DNC, is event driven, and retries on failures. It is also not dynamic UDN aware. This commit aims to fix this by: - Change EgressIP to check with nad controller for network presence - If network is not processed/invalid skip retrying in egress IP controller - Register NAD Reconciler for Egress IP, so that when network becomes active Egress IP handles reconciliation. - If dynamic UDN is enabled, filter out EgressIP operations for inactive nodes. Overall this should be a quality of life improvement to EgressIP and reduce unnecessary reconcilation with UDN. Future steps will be to break Egress IP into its own level driven controller. Signed-off-by: Tim Rozet --- .../pkg/ovn/default_network_controller.go | 9 + go-controller/pkg/ovn/egressip.go | 207 ++++++++++++++++-- 2 files changed, 197 insertions(+), 19 deletions(-) diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 5e850fef14..61023878ad 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -248,6 +248,9 @@ func newDefaultNetworkControllerCommon( oc.ovnClusterLRPToJoinIfAddrs = gwLRPIfAddrs oc.initRetryFramework() + if oc.eIPC != nil { + oc.eIPC.retryEgressIPPods = oc.retryEgressIPPods + } return oc, nil } @@ -343,6 +346,9 @@ func (oc *DefaultNetworkController) Stop() { if oc.efController != nil { oc.efController.Stop() } + if oc.eIPC != nil { + oc.eIPC.StopNADReconciler() + } if oc.routeImportManager != nil { oc.routeImportManager.ForgetNetwork(oc.GetNetworkName()) } @@ -459,6 +465,9 @@ func (oc *DefaultNetworkController) run(_ context.Context) error { } if config.OVNKubernetesFeature.EnableEgressIP { + if err := oc.eIPC.StartNADReconciler(); err != nil { + return err + } // This is probably the best starting order for all egress IP handlers. // WatchEgressIPPods and WatchEgressIPNamespaces only use the informer // cache to retrieve the egress IPs when determining if namespace/pods diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 41f2e9a6af..2fa91d3114 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -22,8 +22,10 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" + "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" @@ -32,6 +34,7 @@ import ( ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/generator/udn" @@ -44,6 +47,7 @@ import ( addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egressservice" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/udnenabledsvc" + ovnretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -192,7 +196,12 @@ type EgressIPController struct { // value will be true if local to this zone and false otherwise nodeZoneState *syncmap.SyncMap[bool] // networkManager used for getting network information for UDNs - networkManager networkmanager.Interface + networkManager networkmanager.Interface + nadReconciler networkmanager.NADReconciler + nadReconcilerID uint64 + nadReconcilerRegistered bool + // retryEgressIPPods allows requeuing egressIP pod processing on NAD changes + retryEgressIPPods *ovnretry.RetryFramework // An address set factory that creates address sets addressSetFactory addressset.AddressSetFactory // Northbound database zone name to which this Controller is connected to - aka local zone @@ -233,6 +242,16 @@ func NewEIPController( v4: v4, v6: v6, } + nadReconcilerConfig := &controller.ReconcilerConfig{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Reconcile: e.syncNAD, + Threadiness: 1, + MaxAttempts: controller.InfiniteAttempts, + } + e.nadReconciler = controller.NewReconciler( + controllerName+"-egressip-nad", + nadReconcilerConfig, + ) return e } @@ -362,19 +381,25 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e for _, namespace := range namespaces { namespaceLabels := labels.Set(namespace.Labels) if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if !ok { + continue + } if err := e.deleteNamespaceEgressIPAssignment(ni, oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to delete namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err) } } if newNamespaceSelector.Matches(namespaceLabels) && !oldNamespaceSelector.Matches(namespaceLabels) { - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if !ok { + continue + } if err := e.addNamespaceEgressIPAssignments(ni, newEIP.Name, newEIP.Status.Items, mark, namespace, newEIP.Spec.PodSelector); err != nil { errs = append(errs, fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err)) } @@ -397,19 +422,25 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e for _, pod := range pods { podLabels := labels.Set(pod.Labels) if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if !ok { + continue + } if err := e.deletePodEgressIPAssignmentsWithCleanup(ni, oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) } } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if !ok { + continue + } if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } @@ -429,10 +460,13 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e namespaceLabels := labels.Set(namespace.Labels) // If the namespace does not match anymore then there's no // reason to look at the pod selector. - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if !ok { + continue + } if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { if err := e.deleteNamespaceEgressIPAssignment(ni, oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to delete namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err) @@ -536,10 +570,13 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa return err } if namespaceSelector.Matches(oldLabels) && !namespaceSelector.Matches(newLabels) { - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespaceName) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespaceName) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %w", namespaceName, err) } + if !ok { + return nil + } if err := e.deleteNamespaceEgressIPAssignment(ni, eIP.Name, eIP.Status.Items, oldNamespace, eIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to delete namespace %q for egress IP %q: %w", ni.GetNetworkName(), namespaceName, eIP.Name, err) @@ -547,10 +584,13 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa } if !namespaceSelector.Matches(oldLabels) && namespaceSelector.Matches(newLabels) { mark := getEgressIPPktMark(eIP.Name, eIP.Annotations) - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespaceName) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespaceName) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespaceName, err) } + if !ok { + return nil + } if err := e.addNamespaceEgressIPAssignments(ni, eIP.Name, eIP.Status.Items, mark, newNamespace, eIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to add namespace %q for egress IP %q: %w", ni.GetNetworkName(), namespaceName, eIP.Name, err) @@ -653,15 +693,13 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err if err != nil { return err } - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) - if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) - } + oldMatches, newMatches := false, false + deletePath := false if !podSelector.Empty() { // Use "new" and "old" instead of "newPod" and "oldPod" to determine whether // pods was created or is being deleted. - newMatches := new != nil && podSelector.Matches(newPodLabels) - oldMatches := old != nil && podSelector.Matches(oldPodLabels) + newMatches = new != nil && podSelector.Matches(newPodLabels) + oldMatches = old != nil && podSelector.Matches(oldPodLabels) // If the podSelector doesn't match the pod, then continue // because this EgressIP intends to match other pods in that // namespace and not this one. Other EgressIP objects might @@ -671,7 +709,31 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err } // Check if the pod stopped matching. If the pod was deleted, // "new" will be nil, so this must account for that case. - if !newMatches && oldMatches { + deletePath = !newMatches && oldMatches + } else { + // Empty pod selector means all pods in namespace are matched. + deletePath = new == nil + } + + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + if err != nil { + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if !ok && deletePath && old != nil { + // During dynamic UDN churn, active network resolution can transiently return !ok on delete. + // Fall back to the pod-assignment cache network to avoid skipping stale egressIP cleanup. + if cachedNetwork := e.getNetworkFromPodAssignment(getPodKey(oldPod)); cachedNetwork != nil { + ni = cachedNetwork + ok = true + klog.V(4).Infof("Using cached network %q for egressIP delete reconciliation of pod %s/%s", + ni.GetNetworkName(), oldPod.Namespace, oldPod.Name) + } + } + if !ok { + return nil + } + if !podSelector.Empty() { + if deletePath { if err := e.deletePodEgressIPAssignmentsWithCleanup(ni, eIP.Name, eIP.Status.Items, oldPod); err != nil { return fmt.Errorf("network %s: failed to delete pod %s/%s for egress IP %q: %w", ni.GetNetworkName(), oldPod.Namespace, oldPod.Name, eIP.Name, err) @@ -725,10 +787,13 @@ func (e *EgressIPController) addEgressIPAssignments(name string, statusAssignmen } var errs []error for _, namespace := range namespaces { - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if !ok { + continue + } if err := e.addNamespaceEgressIPAssignments(ni, name, statusAssignments, mark, namespace, podSelector); err != nil { errs = append(errs, err) } @@ -1306,6 +1371,102 @@ func (e *EgressIPController) getALocalZoneNodeName() (string, error) { return "", fmt.Errorf("failed to find a local OVN zone Node") } +// resolveActiveNetworkForNamespaceOnLocalNode returns the active network for the namespace +// only if the network is active on the local zone node. It returns (nil, false, nil) when +// the network is not active on this zone or the namespace/UDN is not yet processed. +func (e *EgressIPController) resolveActiveNetworkForNamespaceOnLocalNode(namespace string) (util.NetInfo, bool, error) { + localNodeName, err := e.getALocalZoneNodeName() + if err != nil { + klog.V(5).Infof("No local zone node found while resolving network for namespace %q: %v", namespace, err) + return nil, false, nil + } + + netInfo, ok, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode(e.networkManager, localNodeName, namespace) + if err != nil { + if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { + return nil, false, nil + } + return nil, false, err + } + return netInfo, ok, nil +} + +func (e *EgressIPController) StartNADReconciler() error { + if e.networkManager == nil || e.nadReconciler == nil { + return nil + } + if !e.nadReconcilerRegistered { + id, err := e.networkManager.RegisterNADReconciler(e.nadReconciler) + if err != nil { + return err + } + e.nadReconcilerID = id + e.nadReconcilerRegistered = true + } + return controller.Start(e.nadReconciler) +} + +func (e *EgressIPController) StopNADReconciler() { + if e.nadReconcilerRegistered { + if err := e.networkManager.DeRegisterNADReconciler(e.nadReconcilerID); err != nil { + klog.Warningf("Failed to deregister egress IP NAD reconciler: %v", err) + } + e.nadReconcilerRegistered = false + } + controller.Stop(e.nadReconciler) + e.nadReconcilerID = 0 + e.nadReconciler = nil +} + +func (e *EgressIPController) syncNAD(key string) error { + startTime := time.Now() + klog.V(5).Infof("Egress IP NAD reconcile %s", key) + defer func() { + klog.V(4).Infof("Finished syncing Egress IP for NAD %s, took %v", key, time.Since(startTime)) + }() + + namespace, _, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + klog.Errorf("Failed splitting NAD key %s: %v", key, err) + return nil + } + + ni := e.networkManager.GetNetInfoForNADKey(key) + if ni == nil { + return nil + } + // Only reconcile for primary network NADs. Secondary NADs are irrelevant for EgressIP. + if !ni.IsPrimaryNetwork() { + return nil + } + // Ensure egressIP pods for this namespace are retried after NAD processing so + // we don't miss the UDN IPs if pod updates raced the NAD event. + e.addEgressIPPodRetriesForNamespace(namespace) + return nil +} + +func (e *EgressIPController) addEgressIPPodRetriesForNamespace(namespace string) { + if e.retryEgressIPPods == nil { + return + } + pods, err := e.watchFactory.GetPods(namespace) + if err != nil { + klog.Warningf("Failed to list pods for EgressIP NAD retry in namespace %s: %v", namespace, err) + return + } + for _, pod := range pods { + pod := *pod + if util.PodCompleted(&pod) { + continue + } + klog.V(5).Infof("Adding egress IP pod %s/%s for immediate retry due to NAD change", pod.Name, pod.Namespace) + if err := e.retryEgressIPPods.AddRetryObjWithAddNoBackoff(&pod); err != nil { + klog.Warningf("Failed to add pod %s/%s to egressIP retry queue: %v", pod.Namespace, pod.Name, err) + } + } + e.retryEgressIPPods.RequestRetryObjs() +} + func (e *EgressIPController) syncStaleAddressSetIPs(egressIPCache egressIPCache) error { for _, networkPodCache := range egressIPCache.egressIPNameToPods { for networkName, podCache := range networkPodCache { @@ -1902,11 +2063,15 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { cache.networkToRouter = map[string]string{} // build a map of networks -> nodes -> redirect IP for _, namespace := range namespaces { - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) if err != nil { klog.Errorf("Failed to get active network for namespace %s, stale objects may remain: %v", namespace.Name, err) continue } + if !ok { + klog.V(5).Infof("Skipping namespace %s while building egress IP cache: network not active on local zone", namespace.Name) + continue + } // skip if already processed if _, ok := redirectCache[ni.GetNetworkName()]; ok { continue @@ -2054,12 +2219,16 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { klog.Errorf("Error building egress IP sync cache, cannot retrieve pods for namespace: %s and egress IP: %s, err: %v", namespace.Name, egressIP.Name, err) continue } - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) if err != nil { klog.Errorf("Failed to get active network for namespace %s, skipping sync: %v", namespace.Name, err) continue } - _, ok := egressIPsCache[egressIP.Name][ni.GetNetworkName()] + if !ok { + klog.V(5).Infof("Skipping namespace %s while building egress IP sync cache: network not active on local zone", namespace.Name) + continue + } + _, ok = egressIPsCache[egressIP.Name][ni.GetNetworkName()] if ok { continue // aready populated } From a114896d8010d1cfc754498caa5efb0d62205ec3 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 6 Feb 2026 10:18:46 -0500 Subject: [PATCH 15/59] E2E: Add integration UDN test Adds a test that creates a primary + secondary UDN, pod, egress IP, KNP, MNP objects in those UDNs. Then restarts every ovnkube-pod, and ensures it comes back up in ready state. This is useful in general to make sure we survive restarts correclty, but especially useful for Dynamic UDN where a network may not be active on a node and we want to ensure start up syncing is not failing because of that. Signed-off-by: Tim Rozet --- test/e2e/network_segmentation_integration.go | 304 +++++++++++++++++++ 1 file changed, 304 insertions(+) create mode 100644 test/e2e/network_segmentation_integration.go diff --git a/test/e2e/network_segmentation_integration.go b/test/e2e/network_segmentation_integration.go new file mode 100644 index 0000000000..f2563980bb --- /dev/null +++ b/test/e2e/network_segmentation_integration.go @@ -0,0 +1,304 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" + mnpclient "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1beta1" + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/feature" + "github.com/ovn-org/ovn-kubernetes/test/e2e/ipalloc" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" +) + +var _ = Describe("Network Segmentation: integration", feature.NetworkSegmentation, func() { + f := wrappedTestFramework("network-segmentation-integration") + f.SkipNamespaceCreation = true + + var cs clientset.Interface + + BeforeEach(func() { + cs = f.ClientSet + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + f.Namespace = namespace + Expect(err).NotTo(HaveOccurred()) + }) + + It("should recover ovnkube pods after restart with primary and secondary UDN resources", func() { + const ( + primaryUDNName = "primary-udn" + secondaryUDNName = "secondary-udn" + egressIPName = "udn-egressip" + udnPodName = "udn-egress-pod" + udnServiceName = "udn-service" + serviceTargetPort = 80 + nodeHostnameKey = "kubernetes.io/hostname" + egressPodLabelKey = "udn-egress-pod" + egressPodLabelVal = "enabled" + egressNSLabelKey = "udn-egress-namespace" + egressNSLabelValue = "enabled" + ) + DeferCleanup(func() { + e2ekubectl.RunKubectlOrDie("", "delete", "eip", egressIPName, "--ignore-not-found=true") + }) + + primaryNamespace := f.Namespace.Name + + By("creating a primary UDN and waiting until it is ready") + cleanupPrimaryUDN, err := createManifest(primaryNamespace, newPrimaryUserDefinedNetworkManifest(cs, primaryUDNName)) + Expect(err).NotTo(HaveOccurred()) + defer cleanupPrimaryUDN() + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, primaryNamespace, primaryUDNName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating a secondary UDN and waiting until it is ready") + cleanupSecondaryUDN, err := createManifest(primaryNamespace, newL2SecondaryUDNManifest(secondaryUDNName)) + Expect(err).NotTo(HaveOccurred()) + defer cleanupSecondaryUDN() + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, primaryNamespace, secondaryUDNName), 30*time.Second, time.Second).Should(Succeed()) + + By("labeling the primary namespace so it matches the EgressIP namespace selector") + primaryNSObj, err := cs.CoreV1().Namespaces().Get(context.Background(), primaryNamespace, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + if primaryNSObj.Labels == nil { + primaryNSObj.Labels = map[string]string{} + } + primaryNSObj.Labels[egressNSLabelKey] = egressNSLabelValue + _, err = cs.CoreV1().Namespaces().Update(context.Background(), primaryNSObj, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("selecting one schedulable node for both pod placement and EgressIP assignment") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 1) + Expect(err).NotTo(HaveOccurred()) + Expect(nodes.Items).NotTo(BeEmpty()) + targetNode := nodes.Items[0].Name + + By(fmt.Sprintf("labeling node %s as egress assignable", targetNode)) + labelNodeForEgress(f, targetNode) + DeferCleanup(func() { + e2ekubectl.RunKubectlOrDie("default", "label", "node", targetNode, "k8s.ovn.org/egress-assignable-") + }) + + By("creating an EgressIP object selected by the primary UDN namespace and pod label") + var egressIP string + if isIPv4Supported(cs) { + egressIPv4, allocErr := ipalloc.NewPrimaryIPv4() + Expect(allocErr).NotTo(HaveOccurred()) + egressIP = egressIPv4.String() + } else { + egressIPv6, allocErr := ipalloc.NewPrimaryIPv6() + Expect(allocErr).NotTo(HaveOccurred()) + egressIP = egressIPv6.String() + } + cleanupEIP, err := createManifest("", createEIPManifest( + egressIPName, + map[string]string{egressPodLabelKey: egressPodLabelVal}, + map[string]string{egressNSLabelKey: egressNSLabelValue}, + egressIP, + )) + Expect(err).NotTo(HaveOccurred()) + defer cleanupEIP() + + By("creating a pod, service and network policy in the primary UDN namespace") + udnPodCfg := *podConfig( + udnPodName, + withCommand(func() []string { + return httpServerContainerCmd(serviceTargetPort) + }), + withLabels(map[string]string{egressPodLabelKey: egressPodLabelVal}), + withNodeSelector(map[string]string{nodeHostnameKey: targetNode}), + withNetworkAttachment([]nadapi.NetworkSelectionElement{ + {Name: secondaryUDNName}, + }), + ) + udnPodCfg.namespace = primaryNamespace + udnPod := runUDNPod(cs, primaryNamespace, udnPodCfg, nil) + Expect(udnPod).NotTo(BeNil()) + var secondaryAttachmentStatus []nadapi.NetworkStatus + Eventually(func() ([]nadapi.NetworkStatus, error) { + udnPod, err = cs.CoreV1().Pods(primaryNamespace).Get(context.Background(), udnPod.Name, metav1.GetOptions{}) + if err != nil { + return nil, err + } + secondaryAttachmentStatus, err = podNetworkStatus(udnPod, func(status nadapi.NetworkStatus) bool { + return status.Name == namespacedName(primaryNamespace, secondaryUDNName) + }) + return secondaryAttachmentStatus, err + }, 30*time.Second, time.Second).Should(HaveLen(1)) + + By("ensuring EgressIP is assigned to the same node as the pod") + Expect(waitForEgressIPAssignedNode(egressIPName, targetNode)).To(Succeed()) + + By("creating a multi network policy for the secondary UDN") + mnpCli, err := mnpclient.NewForConfig(f.ClientConfig()) + Expect(err).NotTo(HaveOccurred()) + const secondaryUDNMNPName = "secondary-udn-default-deny" + secondaryUDNMNP := &mnpapi.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: secondaryUDNMNPName, + Annotations: map[string]string{ + PolicyForAnnotation: secondaryUDNName, + }, + }, + Spec: mnpapi.MultiNetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{egressPodLabelKey: egressPodLabelVal}, + }, + PolicyTypes: []mnpapi.MultiPolicyType{ + mnpapi.PolicyTypeIngress, + mnpapi.PolicyTypeEgress, + }, + }, + } + _, err = mnpCli.MultiNetworkPolicies(primaryNamespace).Create(context.Background(), secondaryUDNMNP, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + _ = mnpCli.MultiNetworkPolicies(primaryNamespace).Delete(context.Background(), secondaryUDNMNPName, metav1.DeleteOptions{}) + }) + + _, err = cs.CoreV1().Services(primaryNamespace).Create(context.Background(), &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: udnServiceName, + }, + Spec: v1.ServiceSpec{ + Selector: map[string]string{egressPodLabelKey: egressPodLabelVal}, + Ports: []v1.ServicePort{ + { + Name: "http", + Port: serviceTargetPort, + Protocol: v1.ProtocolTCP, + TargetPort: intstr.FromInt(serviceTargetPort), + }, + }, + }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + _, err = makeDenyAllPolicy(f, primaryNamespace, "deny-all") + Expect(err).NotTo(HaveOccurred()) + + By("restarting each ovnkube pod and ensuring all pods recover without crash loops") + Expect(restartAllOVNKubePodsAndAssertHealthy(f)).To(Succeed()) + }) +}) + +func restartAllOVNKubePodsAndAssertHealthy(f *framework.Framework) error { + ovnNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + pods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list ovnkube pods in namespace %s: %w", ovnNamespace, err) + } + + restartedPods := 0 + for i := range pods.Items { + pod := pods.Items[i] + if !strings.HasPrefix(pod.Name, "ovnkube-") || pod.Status.Phase != v1.PodRunning { + continue + } + restartedPods++ + framework.Logf("restarting ovnkube pod %s/%s", pod.Namespace, pod.Name) + if err := deletePodWithWait(context.Background(), f.ClientSet, &pod); err != nil { + return fmt.Errorf("failed restarting ovnkube pod %s/%s: %w", pod.Namespace, pod.Name, err) + } + } + if restartedPods == 0 { + return fmt.Errorf("no running ovnkube pods found in namespace %s", ovnNamespace) + } + + if err := waitOVNKubernetesHealthy(f); err != nil { + return fmt.Errorf("ovn-kubernetes did not become healthy after restarting %d pods: %w", restartedPods, err) + } + + return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { + if err := assertOVNKubePodsReadyAndNotCrashLooping(f.ClientSet, ovnNamespace); err != nil { + framework.Logf("ovnkube pod readiness/crashloop check still failing: %v", err) + return false, nil + } + return true, nil + }) +} + +func assertOVNKubePodsReadyAndNotCrashLooping(cs clientset.Interface, namespace string) error { + pods, err := cs.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed listing ovnkube pods: %w", err) + } + + found := 0 + for _, pod := range pods.Items { + if !strings.HasPrefix(pod.Name, "ovnkube-") { + continue + } + found++ + if pod.Status.Phase != v1.PodRunning { + return fmt.Errorf("pod %s is not running (phase=%s)", pod.Name, pod.Status.Phase) + } + + ready := false + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodReady && condition.Status == v1.ConditionTrue { + ready = true + break + } + } + if !ready { + return fmt.Errorf("pod %s is not ready", pod.Name) + } + + for _, status := range append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...) { + if status.State.Waiting != nil && status.State.Waiting.Reason == "CrashLoopBackOff" { + return fmt.Errorf("pod %s container %s is in CrashLoopBackOff", pod.Name, status.Name) + } + } + } + + if found == 0 { + return fmt.Errorf("no ovnkube pods found in namespace %s", namespace) + } + return nil +} + +func waitForEgressIPAssignedNode(egressIPName, nodeName string) error { + return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { + egressIPStdout, err := e2ekubectl.RunKubectl("", "get", "eip", egressIPName, "-o", "json") + if err != nil { + framework.Logf("failed to fetch EgressIP %s status: %v", egressIPName, err) + return false, nil + } + + var eip egressIP + if err := json.Unmarshal([]byte(egressIPStdout), &eip); err != nil { + return false, fmt.Errorf("failed to unmarshal EgressIP %s status: %w", egressIPName, err) + } + + if len(eip.Status.Items) == 0 { + framework.Logf("EgressIP %s has no status items yet", egressIPName) + return false, nil + } + + for _, status := range eip.Status.Items { + if status.Node == nodeName { + return true, nil + } + } + framework.Logf("EgressIP %s not assigned to node %s yet (statuses: %+v)", egressIPName, nodeName, eip.Status.Items) + return false, nil + }) +} From 90e65d62ad8adafa68a2aa3af0ace3ff5c9092f3 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Sat, 7 Feb 2026 11:39:31 -0500 Subject: [PATCH 16/59] egressip: force pod assignment reprogram when pod IP changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a pod is recreated with the same name, the egressIP cache could already contain a “served” {EgressIP,Node} status and skip programming as a no-op. Since statusMap keys do not include pod IP, LRP/NAT state could remain stale and traffic would miss egressIP SNAT. Fix by detecting pod IP drift from podAssignment.podIPs and forcing a delete+add reprogram for already-applied statuses: - compare cached pod IPs to current pod IPs - queue existing statuses for reprogram on IP change - delete old assignment state (without standby promotion) and re-add it - then update cached pod IPs Signed-off-by: Tim Rozet --- go-controller/pkg/ovn/egressip.go | 37 +++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 2fa91d3114..83f4d9e6b3 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -854,7 +854,7 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str if len(statusAssignments) == 0 { return nil } - var remainingAssignments, staleAssignments []egressipv1.EgressIPStatusItem + var remainingAssignments, staleAssignments, reprogramAssignments []egressipv1.EgressIPStatusItem nadKey, err := e.getPodNADKeyForNetwork(ni, pod) if err != nil { return err @@ -881,6 +881,7 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str network: ni, } } else if podState.egressIPName == name || podState.egressIPName == "" { + podIPsChanged := !podIPSliceEqual(podState.podIPs, podIPs) // We do the setup only if this egressIP object is the one serving this pod OR // podState.egressIPName can be empty if no re-routes were found in // syncPodAssignmentCache for the existing pod, we will treat this case as a new add @@ -889,6 +890,10 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str // (meaning it was populated during EIP sync and needs to be processed for the pod). if value, exists := podState.egressStatuses.statusMap[status]; !exists || value == egressStatusStatePending { remainingAssignments = append(remainingAssignments, status) + } else if podIPsChanged { + // A pod can be re-created with the same name but a different IP. + // Force a delete+add for existing statuses so LRP match/NAT gets updated. + reprogramAssignments = append(reprogramAssignments, status) } // Detect stale EIP status entries (same EgressIP reassigned to a different node) // and queue the outdated entry for cleanup. @@ -896,7 +901,6 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str staleAssignments = append(staleAssignments, *staleStatus) } } - podState.podIPs = podIPs podState.egressIPName = name podState.network = ni podState.standbyEgressIPNames.Delete(name) @@ -930,6 +934,18 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str } delete(podState.egressStatuses.statusMap, staleStatus) } + if len(reprogramAssignments) > 0 { + klog.V(2).Infof("Pod %s IPs changed, forcing egress IP status reprogram for statuses: %+v", podKey, reprogramAssignments) + if err := e.deletePodEgressIPAssignments(ni, name, reprogramAssignments, pod, false); err != nil { + return fmt.Errorf("failed to force reprogram of pod %s statuses %v for egress IP %s: %w", + podKey, reprogramAssignments, name, err) + } + for _, status := range reprogramAssignments { + delete(podState.egressStatuses.statusMap, status) + } + remainingAssignments = append(remainingAssignments, reprogramAssignments...) + } + podState.podIPs = podIPs // We store podState into podAssignment cache at this place for two reasons. // 1. When podAssignmentState is newly created. // 2. deletePodEgressIPAssignments might clean the podAssignment cache, make sure we add it back. @@ -2501,6 +2517,23 @@ func (e egressStatuses) delete(deleteStatus egressipv1.EgressIPStatusItem) { delete(e.statusMap, deleteStatus) } +func podIPSliceEqual(oldIPs, newIPs []net.IP) bool { + if len(oldIPs) != len(newIPs) { + return false + } + oldIPStrings := make([]string, 0, len(oldIPs)) + for _, podIP := range oldIPs { + oldIPStrings = append(oldIPStrings, podIP.String()) + } + newIPStrings := make([]string, 0, len(newIPs)) + for _, podIP := range newIPs { + newIPStrings = append(newIPStrings, podIP.String()) + } + sort.Strings(oldIPStrings) + sort.Strings(newIPStrings) + return slices.Equal(oldIPStrings, newIPStrings) +} + // podAssignmentState keeps track of which egressIP object is serving // the related pod. // NOTE: At a given time only one object will be configured. This is From 67dfeafeba950261b440d27925ab7c739fd86834 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Sat, 7 Feb 2026 12:03:12 -0500 Subject: [PATCH 17/59] egressip: trigger immediate retry when UDN pod port cache is populated EgressIP pod handling assumes pod networking setup has already populated logicalPortCache before egressIP reconciliation runs. That ordering holds within one controller queue, but breaks for primary UDNs where pod setup runs in UDN controllers while egressIP pod reconcile runs in the default controller. In that cross-controller race, egressIP reconcile can run first, fail to get pod IPs (stale/missing LSP), and wait for normal retry cadence even after UDN later updates port cache. Fix by wiring an immediate egressIP pod retry on logicalPortCache add: - add a base controller callback hook for logicalPortCache add events - invoke it from default/UDN pod logical port add paths - hook it for primary UDN controllers to enqueue no-backoff egressIP pod retry - centralize retry logic in eIPController.addEgressIPPodRetry() (including PodNeedsSNAT filtering) This preserves existing behavior while removing the UDN/DNC ordering race window for egressIP pod programming. Signed-off-by: Tim Rozet --- go-controller/pkg/ovn/base_network_controller.go | 3 +++ .../ovn/base_network_controller_user_defined.go | 3 +++ go-controller/pkg/ovn/egressip.go | 16 ++++++++++++---- .../layer2_user_defined_network_controller.go | 5 +++++ .../layer3_user_defined_network_controller.go | 5 +++++ go-controller/pkg/ovn/pods.go | 3 +++ 6 files changed, 31 insertions(+), 4 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index 7656e41ebc..bb8f5e413e 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -124,6 +124,9 @@ type BaseNetworkController struct { // A cache of all logical ports known to the controller logicalPortCache *PortCache + // optional callback for consumers that need to react when a pod's logical + // port info is inserted/refreshed in logicalPortCache. + onLogicalPortCacheAdd func(pod *corev1.Pod, nadKey string) // Info about known namespaces. You must use oc.getNamespaceLocked() or // oc.waitForNamespaceLocked() to read this map, and oc.createNamespaceLocked() diff --git a/go-controller/pkg/ovn/base_network_controller_user_defined.go b/go-controller/pkg/ovn/base_network_controller_user_defined.go index 238daee738..55df33ce13 100644 --- a/go-controller/pkg/ovn/base_network_controller_user_defined.go +++ b/go-controller/pkg/ovn/base_network_controller_user_defined.go @@ -422,6 +422,9 @@ func (bsnc *BaseUserDefinedNetworkController) addLogicalPortToNetworkForNAD(pod if lsp != nil { _ = bsnc.logicalPortCache.add(pod, switchName, nadKey, lsp.UUID, podAnnotation.MAC, podAnnotation.IPs) + if bsnc.onLogicalPortCacheAdd != nil { + bsnc.onLogicalPortCacheAdd(pod, nadKey) + } if bsnc.requireDHCP(pod) { if err := bsnc.ensureDHCP(pod, podAnnotation, lsp); err != nil { return err diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 83f4d9e6b3..779e07939f 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1475,10 +1475,18 @@ func (e *EgressIPController) addEgressIPPodRetriesForNamespace(namespace string) if util.PodCompleted(&pod) { continue } - klog.V(5).Infof("Adding egress IP pod %s/%s for immediate retry due to NAD change", pod.Name, pod.Namespace) - if err := e.retryEgressIPPods.AddRetryObjWithAddNoBackoff(&pod); err != nil { - klog.Warningf("Failed to add pod %s/%s to egressIP retry queue: %v", pod.Namespace, pod.Name, err) - } + e.addEgressIPPodRetry(&pod, "NAD change") + } +} + +func (e *EgressIPController) addEgressIPPodRetry(pod *corev1.Pod, reason string) { + if e.retryEgressIPPods == nil || pod == nil || util.PodCompleted(pod) || !util.PodNeedsSNAT(pod) { + return + } + klog.V(5).Infof("Adding egress IP pod %s/%s for immediate retry due to %s", pod.Namespace, pod.Name, reason) + if err := e.retryEgressIPPods.AddRetryObjWithAddNoBackoff(pod); err != nil { + klog.Warningf("Failed to add pod %s/%s to egressIP retry queue: %v", pod.Namespace, pod.Name, err) + return } e.retryEgressIPPods.RequestRetryObjs() } diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go index 63f4994cfa..f47b0c14a6 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go @@ -398,6 +398,11 @@ func NewLayer2UserDefinedNetworkController( eIPController: eIPController, remoteNodesNoRouter: sync.Map{}, } + if oc.IsPrimaryNetwork() && oc.eIPController != nil { + oc.onLogicalPortCacheAdd = func(pod *corev1.Pod, _ string) { + oc.eIPController.addEgressIPPodRetry(pod, "logical port cache update") + } + } if config.OVNKubernetesFeature.EnableInterconnect { oc.zoneICHandler = zoneinterconnect.NewZoneInterconnectHandler(oc.GetNetInfo(), oc.nbClient, oc.sbClient, oc.watchFactory) diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go index 24ccf96a5f..57059e296c 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go @@ -385,6 +385,11 @@ func NewLayer3UserDefinedNetworkController( gatewayManagers: sync.Map{}, eIPController: eIPController, } + if oc.IsPrimaryNetwork() && oc.eIPController != nil { + oc.onLogicalPortCacheAdd = func(pod *corev1.Pod, _ string) { + oc.eIPController.addEgressIPPodRetry(pod, "logical port cache update") + } + } if config.OVNKubernetesFeature.EnableInterconnect { oc.zoneICHandler = zoneic.NewZoneInterconnectHandler(oc.GetNetInfo(), cnci.nbClient, cnci.sbClient, cnci.watchFactory) diff --git a/go-controller/pkg/ovn/pods.go b/go-controller/pkg/ovn/pods.go index e877cb9af6..e43a2cb31f 100644 --- a/go-controller/pkg/ovn/pods.go +++ b/go-controller/pkg/ovn/pods.go @@ -379,6 +379,9 @@ func (oc *DefaultNetworkController) addLogicalPort(pod *corev1.Pod) (err error) // Add the pod's logical switch port to the port cache _ = oc.logicalPortCache.add(pod, switchName, types.DefaultNetworkName, lsp.UUID, podAnnotation.MAC, podAnnotation.IPs) + if oc.onLogicalPortCacheAdd != nil { + oc.onLogicalPortCacheAdd(pod, types.DefaultNetworkName) + } if kubevirt.IsPodLiveMigratable(pod) { if err := oc.ensureDHCP(pod, podAnnotation, lsp); err != nil { From 162404919cabadc88bde226d9265abf29741699f Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Tue, 10 Feb 2026 16:51:07 -0500 Subject: [PATCH 18/59] GetActiveNetworkForNamespace Refactor Removes UnprocessedActiveNetwork Error, and moves to just using a single error, InvalidPrimaryNetworkError for everything. Modifies GetActiveNetworkForNamespace to return nil when there is no active network due to namespace being removed, or Dynamic UDN filtering. Callers can then rely on this function to determine whether or not a network is active versus the network should exist but doesn't (an error). Walked through all callers of GetActiveNetworkForNamespace and GetPrimaryNADForNamespace and tried to simplify number of calls and logic. Signed-off-by: Tim Rozet --- .../endpointslice_mirror_controller.go | 2 +- .../networkconnect/cluster_network_connect.go | 8 +- .../networkconnect/controller.go | 9 +- .../pkg/clustermanager/pod/allocator.go | 10 +- go-controller/pkg/cni/udn/primary_network.go | 4 + go-controller/pkg/networkmanager/api.go | 59 ++------ .../pkg/networkmanager/egressip_tracker.go | 4 +- .../pkg/networkmanager/nad_controller.go | 58 +++----- .../pkg/networkmanager/pod_tracker.go | 2 +- .../node/base_node_network_controller_dpu.go | 17 +-- .../pkg/node/controllers/egressip/egressip.go | 8 ++ go-controller/pkg/node/gateway_init.go | 2 +- go-controller/pkg/node/gateway_shared_intf.go | 98 +++++--------- .../pkg/node/gateway_shared_intf_test.go | 36 ++--- .../pkg/ovn/base_network_controller.go | 18 ++- .../base_network_controller_user_defined.go | 29 ++-- .../egressfirewall/egressfirewall.go | 28 ++-- .../services/services_controller.go | 2 +- go-controller/pkg/ovn/egressip.go | 126 ++++++++++-------- go-controller/pkg/util/util.go | 21 --- 20 files changed, 231 insertions(+), 310 deletions(-) diff --git a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go index 66f95f1e83..6e97f0a7d0 100644 --- a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go +++ b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go @@ -252,7 +252,7 @@ func (c *Controller) syncDefaultEndpointSlice(ctx context.Context, key string) e return err } - if namespacePrimaryNetwork.IsDefault() || !namespacePrimaryNetwork.IsPrimaryNetwork() { + if namespacePrimaryNetwork == nil || namespacePrimaryNetwork.IsDefault() || !namespacePrimaryNetwork.IsPrimaryNetwork() { return nil } diff --git a/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go b/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go index 89e0eb2b8b..a48036e6ab 100644 --- a/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go +++ b/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go @@ -38,8 +38,8 @@ var ( func getPrimaryNADForNamespace(networkMgr networkmanager.Interface, namespaceName string, nadLister nadlisters.NetworkAttachmentDefinitionLister) (nadKey string, network util.NetInfo, err error) { namespacePrimaryNetwork, err := networkMgr.GetActiveNetworkForNamespace(namespaceName) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { - // We intentionally ignore the unprocessed active network error because + if util.IsInvalidPrimaryNetworkError(err) { + // We intentionally ignore the invalid primary network error because // UDN Controller hasn't created the NAD yet, OR NAD doesn't exist in a // namespace that has the required UDN label. It could also be that the // UDN was deleted and the NAD is also gone. @@ -47,13 +47,13 @@ func getPrimaryNADForNamespace(networkMgr networkmanager.Interface, namespaceNam } return "", nil, err } - if namespacePrimaryNetwork.IsDefault() { + if namespacePrimaryNetwork == nil || namespacePrimaryNetwork.IsDefault() { // No primary UDN in this namespace return "", nil, nil } primaryNADKey, err := networkMgr.GetPrimaryNADForNamespace(namespaceName) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { return "", nil, nil } return "", nil, err diff --git a/go-controller/pkg/clustermanager/networkconnect/controller.go b/go-controller/pkg/clustermanager/networkconnect/controller.go index 8bec1787ee..654f69d33d 100644 --- a/go-controller/pkg/clustermanager/networkconnect/controller.go +++ b/go-controller/pkg/clustermanager/networkconnect/controller.go @@ -407,16 +407,19 @@ func (c *Controller) mustProcessCNCForNAD(nad *nadv1.NetworkAttachmentDefinition continue } for _, namespace := range namespaces { - primaryNAD, err := c.networkManager.GetActiveNetworkForNamespace(namespace.Name) + nsPrimaryNetwork, err := c.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) || util.IsInvalidPrimaryNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { continue } klog.Errorf("Failed to get active network for namespace %s: %v", namespace.Name, err) continue } + if nsPrimaryNetwork == nil { + continue + } networkName := c.networkManager.GetNetworkNameForNADKey(nadKey) - if networkName != "" && networkName == primaryNAD.GetNetworkName() { + if networkName != "" && networkName == nsPrimaryNetwork.GetNetworkName() { isSelected = true break selectorLoop } diff --git a/go-controller/pkg/clustermanager/pod/allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go index 5e5e65f25d..ab377aa759 100644 --- a/go-controller/pkg/clustermanager/pod/allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -113,11 +113,17 @@ func (a *PodAllocator) Init() error { func (a *PodAllocator) getActiveNetworkForPod(pod *corev1.Pod) (util.NetInfo, error) { activeNetwork, err := a.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { a.recordPodErrorEvent(pod, err) } return nil, err } + // Cluster manager pod allocation should always have an active network + if activeNetwork == nil { + newErr := fmt.Errorf("no active network found for pod %s/%s", pod.Namespace, pod.Name) + a.recordPodErrorEvent(pod, newErr) + return nil, newErr + } return activeNetwork, nil } @@ -131,7 +137,7 @@ func (a *PodAllocator) GetNetworkRole(pod *corev1.Pod) (string, error) { pod, ) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { a.recordPodErrorEvent(pod, err) } return "", err diff --git a/go-controller/pkg/cni/udn/primary_network.go b/go-controller/pkg/cni/udn/primary_network.go index c751a6cbda..8ac6fecfc6 100644 --- a/go-controller/pkg/cni/udn/primary_network.go +++ b/go-controller/pkg/cni/udn/primary_network.go @@ -152,6 +152,10 @@ func (p *UserDefinedPrimaryNetwork) ensureActiveNetwork(namespace string) error if err != nil { return err } + // CNI should always have an active network for a pod on our node + if activeNetwork == nil { + return fmt.Errorf("no active network found for namespace %s", namespace) + } if activeNetwork.IsDefault() { return fmt.Errorf("missing primary user defined network NAD for namespace '%s'", namespace) } diff --git a/go-controller/pkg/networkmanager/api.go b/go-controller/pkg/networkmanager/api.go index a8940ec87d..1581afcfa2 100644 --- a/go-controller/pkg/networkmanager/api.go +++ b/go-controller/pkg/networkmanager/api.go @@ -3,7 +3,6 @@ package networkmanager import ( "context" "errors" - "fmt" nadinformers "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1" @@ -44,11 +43,13 @@ type watchFactory interface { // information to the rest of the project. type Interface interface { // GetActiveNetworkForNamespace returns a copy of the primary network for - // the namespace if any or the default network otherwise. If there is a - // primary UDN defined but the NAD has not been processed yet, returns - // ErrNetworkControllerTopologyNotManaged. Used for controllers that are not - // capable of reconciling primary network changes. If unsure, use this one - // and not GetActiveNetworkForNamespaceFast. + // the namespace if any or the default network otherwise. + // If the network is non-existent for a legitimate reason (namespace gone or + // filtered by Dynamic UDN) it returns nil NetInfo and no error. + // If the network is non-existent, but should exist, return InvalidPrimaryNetworkError. + // If unsure, use this one and not GetActiveNetworkForNamespaceFast. + // Note this function is filtered by Dynamic UDN, so if your caller wants NAD/Network + // information without D-UDN filtering, use GetPrimaryNADForNamespace. GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) // GetActiveNetworkForNamespaceFast returns the primary network for the @@ -62,6 +63,7 @@ type Interface interface { // GetPrimaryNADForNamespace returns the full namespaced key of the // primary NAD for the given namespace, if one exists. // Returns default network if namespace has no primary UDN. + // This function is not filtered based on Dynamic UDN. GetPrimaryNADForNamespace(namespace string) (string, error) // GetNetwork returns the network of the given name or nil if unknown @@ -107,51 +109,6 @@ type Controller interface { Stop() } -// ResolveActiveNetworkForNamespaceOnNode returns the active primary network for the namespace only if -// the network is active on the given node. It uses the NAD cache to resolve the namespace's primary -// NAD and network name, checks NodeHasNetwork, and then calls GetActiveNetworkForNamespace to fetch -// the rendered NetInfo. It returns (nil, false, nil) when the network is not active on the node. -func ResolveActiveNetworkForNamespaceOnNode(nm Interface, nodeName, namespace string) (util.NetInfo, bool, error) { - if nm == nil { - return nil, false, fmt.Errorf("network manager is nil") - } - - nadKey, err := nm.GetPrimaryNADForNamespace(namespace) - if err != nil { - return nil, false, err - } - if nadKey == "" { - // Namespace is gone - return nil, false, nil - } - - if nadKey == types.DefaultNetworkName { - netInfo, err := nm.GetActiveNetworkForNamespace(namespace) - if err != nil { - return nil, false, err - } - return netInfo, true, nil - } - - networkName := nm.GetNetworkNameForNADKey(nadKey) - if networkName == "" { - return nil, false, fmt.Errorf("no primary network found for namespace %s", namespace) - } - - if !nm.NodeHasNetwork(nodeName, networkName) { - return nil, false, nil - } - - // At this point the namespace's primary NAD is known and the network is active on this node, - // so GetActiveNetworkForNamespace should not normally return InvalidPrimaryNetworkError. - // Any error here is treated as transient/inconsistent state. - netInfo, err := nm.GetActiveNetworkForNamespace(namespace) - if err != nil { - return nil, false, err - } - return netInfo, true, nil -} - // Default returns a default implementation that assumes the default network is // the only ever existing network. Used when multi-network capabilities are not // enabled or testing. diff --git a/go-controller/pkg/networkmanager/egressip_tracker.go b/go-controller/pkg/networkmanager/egressip_tracker.go index be941b9bcd..9b86bb2cce 100644 --- a/go-controller/pkg/networkmanager/egressip_tracker.go +++ b/go-controller/pkg/networkmanager/egressip_tracker.go @@ -250,7 +250,7 @@ func (t *EgressIPTrackerController) reconcileNamespace(key string) error { primaryNAD, err := t.primaryNADForNamespace(ns.Name) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { // Namespace requires a primary network but none exists yet; NAD controller will requeue. return nil } @@ -380,5 +380,5 @@ func (t *EgressIPTrackerController) getPrimaryNADForNamespaceFromLister(namespac } // The namespace declared it needs a primary UDN but none exists yet. - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } diff --git a/go-controller/pkg/networkmanager/nad_controller.go b/go-controller/pkg/networkmanager/nad_controller.go index 63335beea0..29a2099c5c 100644 --- a/go-controller/pkg/networkmanager/nad_controller.go +++ b/go-controller/pkg/networkmanager/nad_controller.go @@ -31,7 +31,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors" - utiludn "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/udn" ) // nadController handles namespaced scoped NAD events and @@ -839,6 +838,10 @@ func (c *nadController) nadNeedsUpdate(oldNAD, newNAD *nettypes.NetworkAttachmen oldNAD.Annotations[types.OvnNetworkNameAnnotation] != newNAD.Annotations[types.OvnNetworkNameAnnotation] } +// GetActiveNetworkForNamespace attempts to get the netInfo of a primary active network where this OVNK instance is running. +// Returns DefaultNetwork if Network Segmentation disabled or namespace does not require primary UDN. +// Returns nil if there is no active network. +// Returns InvalidPrimaryNetworkError if a network should be present but is not. func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { if !util.IsNetworkSegmentationSupportEnabled() { return &util.DefaultNetInfo{}, nil @@ -847,6 +850,10 @@ func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.Net // check if required UDN label is on namespace ns, err := c.namespaceLister.Get(namespace) if err != nil { + if apierrors.IsNotFound(err) { + // namespace is gone, no active network for it + return nil, nil + } return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, err) } if _, exists := ns.Labels[types.RequiredUDNNamespaceLabel]; !exists { @@ -854,49 +861,26 @@ func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.Net return &util.DefaultNetInfo{}, nil } - network, nad := c.getActiveNetworkForNamespace(namespace) + // primary UDN territory, check if our NAD controller to see if it has processed the network and if the + // network manager has rendered the network + network, primaryNAD := c.getActiveNetworkForNamespace(namespace) if network != nil && network.IsPrimaryNetwork() { - // primary UDN found + // primary UDN network found in network controller copy := util.NewMutableNetInfo(network) - copy.SetNADs(nad) + copy.SetNADs(primaryNAD) return copy, nil } - // no primary UDN found, make sure we just haven't processed it yet and no UDN / CUDN exists - udns, err := c.udnLister.UserDefinedNetworks(namespace).List(labels.Everything()) - if err != nil { - return nil, fmt.Errorf("error getting user defined networks: %w", err) - } - for _, udn := range udns { - if utiludn.IsPrimaryNetwork(&udn.Spec) { - return nil, util.NewUnprocessedActiveNetworkError(namespace, udn.Name) - } - } - cudns, err := c.cudnLister.List(labels.Everything()) - if err != nil { - return nil, fmt.Errorf("failed to list CUDNs: %w", err) - } - for _, cudn := range cudns { - if !utiludn.IsPrimaryNetwork(&cudn.Spec.Network) { - continue - } - // check the subject namespace referred by the specified namespace-selector - cudnNamespaceSelector, err := metav1.LabelSelectorAsSelector(&cudn.Spec.NamespaceSelector) - if err != nil { - return nil, fmt.Errorf("failed to convert CUDN %q namespaceSelector: %w", cudn.Name, err) - } - selectedNamespaces, err := c.namespaceLister.List(cudnNamespaceSelector) - if err != nil { - return nil, fmt.Errorf("failed to list namespaces using selector %q: %w", cudnNamespaceSelector, err) - } - for _, ns := range selectedNamespaces { - if ns.Name == namespace { - return nil, util.NewUnprocessedActiveNetworkError(namespace, cudn.Name) - } + // no network exists in the network manager + if primaryNAD != "" { + if config.OVNKubernetesFeature.EnableDynamicUDNAllocation { + // primary NAD exists, no network, and DUDN is enabled, treat this like the network doesn't exist + return nil, nil } + // primary NAD exists, but missing in network manager. This should never happen. + panic(fmt.Sprintf("NAD Controller broken consistency with Network Manager for primary NAD: %s", primaryNAD)) } - // namespace has required UDN label, but no UDN was found return nil, util.NewInvalidPrimaryNetworkError(namespace) } @@ -930,7 +914,7 @@ func (c *nadController) GetPrimaryNADForNamespace(namespace string) (string, err } if _, exists := ns.Labels[types.RequiredUDNNamespaceLabel]; exists { // Namespace promises a primary UDN, but we haven't cached one yet. - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } // No required label: means default network only. diff --git a/go-controller/pkg/networkmanager/pod_tracker.go b/go-controller/pkg/networkmanager/pod_tracker.go index 4a300dd099..3a682d41ed 100644 --- a/go-controller/pkg/networkmanager/pod_tracker.go +++ b/go-controller/pkg/networkmanager/pod_tracker.go @@ -191,7 +191,7 @@ func (c *PodTrackerController) getPrimaryNADForNamespaceFromLister(namespace str return util.GetNADName(nad.Namespace, nad.Name), nil } } - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } // syncAll builds the cache on initial controller start diff --git a/go-controller/pkg/node/base_node_network_controller_dpu.go b/go-controller/pkg/node/base_node_network_controller_dpu.go index db79e35c39..cdcbae6c2f 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu.go @@ -104,6 +104,7 @@ func (bnnc *BaseNodeNetworkController) watchPodsDPU() (*factory.Handler, error) return bnnc.watchFactory.AddPodHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { var activeNetwork util.NetInfo + var err error pod := obj.(*corev1.Pod) klog.V(5).Infof("Add for Pod: %s/%s for network %s", pod.Namespace, pod.Name, netName) @@ -116,22 +117,16 @@ func (bnnc *BaseNodeNetworkController) watchPodsDPU() (*factory.Handler, error) nadToDPUCDMap := map[string]*util.DPUConnectionDetails{} if bnnc.IsUserDefinedNetwork() { if bnnc.IsPrimaryNetwork() { - // check to see if the primary NAD is even applicable to our controller - foundNamespaceNAD, err := bnnc.networkManager.GetPrimaryNADForNamespace(pod.Namespace) + activeNetwork, err = bnnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - klog.Errorf("Failed to get primary network NAD for namespace %s: %v", pod.Namespace, err) - return - } - if foundNamespaceNAD == types.DefaultNetworkName { + klog.Errorf("Failed looking for the active network for namespace %s: %v", pod.Namespace, err) return } - networkName := bnnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) - if networkName != "" && networkName != netName { + if activeNetwork == nil { + klog.Errorf("Unable to find an active network for namespace %s", pod.Namespace) return } - activeNetwork, err = bnnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) - if err != nil { - klog.Errorf("Failed looking for the active network for namespace %s: %v", pod.Namespace, err) + if activeNetwork.GetNetworkName() != netName { return } } diff --git a/go-controller/pkg/node/controllers/egressip/egressip.go b/go-controller/pkg/node/controllers/egressip/egressip.go index 08726875a3..508f3ef57d 100644 --- a/go-controller/pkg/node/controllers/egressip/egressip.go +++ b/go-controller/pkg/node/controllers/egressip/egressip.go @@ -567,6 +567,10 @@ func (c *Controller) processEIP(eip *eipv1.EgressIP) (*eIPConfig, sets.Set[strin if err != nil { return nil, selectedNamespaces, selectedPods, selectedNamespacesPodIPs, fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if netInfo == nil { + // no active network + continue + } if netInfo.IsUserDefinedNetwork() { // EIP for secondary host interfaces is not supported for secondary networks continue @@ -1036,6 +1040,10 @@ func (c *Controller) repairNode() error { if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if netInfo == nil { + // no active network + continue + } if netInfo.IsUserDefinedNetwork() { // EIP for secondary host interfaces is not supported for secondary networks continue diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index 75d704a64a..f0eb9094d6 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -507,7 +507,7 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost() error { return fmt.Errorf("unable to configure UDN nftables: %w", err) } } - gw.nodePortWatcherIptables = newNodePortWatcherIptables(nc.name, nc.networkManager) + gw.nodePortWatcherIptables = newNodePortWatcherIptables(nc.networkManager) gw.loadBalancerHealthChecker = newLoadBalancerHealthChecker(nc.name, nc.watchFactory) portClaimWatcher, err := newPortClaimWatcher(nc.recorder) if err != nil { diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index 33d922acdb..cab65ad87b 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -176,13 +176,11 @@ func configureUDNServicesNFTables() error { // nodePortWatcherIptables manages iptables rules for shared gateway // to ensure that services using NodePorts are accessible. type nodePortWatcherIptables struct { - nodeName string networkManager networkmanager.Interface } -func newNodePortWatcherIptables(nodeName string, networkManager networkmanager.Interface) *nodePortWatcherIptables { +func newNodePortWatcherIptables(networkManager networkmanager.Interface) *nodePortWatcherIptables { return &nodePortWatcherIptables{ - nodeName: nodeName, networkManager: networkManager, } } @@ -191,7 +189,6 @@ func newNodePortWatcherIptables(nodeName string, networkManager networkmanager.I // to ensure that services using NodePorts are accessible type nodePortWatcher struct { dpuMode bool - nodeName string gatewayIPv4 string gatewayIPv6 string gatewayIPLock sync.Mutex @@ -892,16 +889,13 @@ func (npw *nodePortWatcher) AddService(service *corev1.Service) error { } klog.V(5).Infof("Adding service %s in namespace %s", service.Name, service.Namespace) - - netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npw.networkManager, - npw.nodeName, - service.Namespace, - ) + netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) if err != nil { return fmt.Errorf("error getting active network for service %s in namespace %s: %w", service.Name, service.Namespace, err) } - if !activeOnNode { + + if netInfo == nil { + // network not active on our node return nil } @@ -982,15 +976,12 @@ func (npw *nodePortWatcher) UpdateService(old, new *corev1.Service) error { if util.ServiceTypeHasClusterIP(new) && util.IsClusterIPSet(new) { klog.V(5).Infof("Adding new service rules for: %v", new) - netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npw.networkManager, - npw.nodeName, - new.Namespace, - ) + netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(new.Namespace) if err != nil { return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } - if !activeOnNode { + if netInfo == nil { + // network not active on our node return utilerrors.Join(errors...) } @@ -1229,22 +1220,19 @@ func (npw *nodePortWatcher) SyncServices(services []interface{}) error { continue } - netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npw.networkManager, - npw.nodeName, - service.Namespace, - ) + netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) if err != nil { // During startup sync, avoid failing the entire processExisting loop for namespaces that // require a UDN but have no primary NAD yet (or it has been deleted). Those services will // be reconciled later via regular add/update events once the NAD exists. - if util.IsUnprocessedActiveNetworkError(err) || util.IsInvalidPrimaryNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { continue } errors = append(errors, err) continue } - if !activeOnNode { + if netInfo == nil { + // network not active on our node continue } @@ -1323,15 +1311,12 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e var errors []error var svc *corev1.Service - netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npw.networkManager, - npw.nodeName, - epSlice.Namespace, - ) + netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(epSlice.Namespace) if err != nil { return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", epSlice.Name, epSlice.Namespace, err) } - if !activeOnNode { + if netInfo == nil { + // network not active on our node return nil } @@ -1450,14 +1435,10 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice // Get network info after deleting old rules, before adding new ones. // This ensures old rules are cleaned up even if namespace/network is deleted, // and allows graceful handling of deletion race conditions. - netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npw.networkManager, - npw.nodeName, - namespacedName.Namespace, - ) + netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(namespacedName.Namespace) if err != nil { // If the UDN was deleted or not processed yet, skip adding new service rules - if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { klog.V(5).Infof("Skipping addServiceRules for %s/%s during endpoint slice delete: primary network unavailable: %v", namespacedName.Namespace, namespacedName.Name, err) return utilerrors.Join(errors...) @@ -1465,7 +1446,8 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice errors = append(errors, fmt.Errorf("error getting active network for service %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err)) return utilerrors.Join(errors...) } - if !activeOnNode { + if netInfo == nil { + // network not active on our node return utilerrors.Join(errors...) } @@ -1504,15 +1486,12 @@ func (npw *nodePortWatcher) UpdateEndpointSlice(oldEpSlice, newEpSlice *discover var err error var errors []error - netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npw.networkManager, - npw.nodeName, - newEpSlice.Namespace, - ) + netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(newEpSlice.Namespace) if err != nil { return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", newEpSlice.Name, newEpSlice.Namespace, err) } - if !activeOnNode { + if netInfo == nil { + // network not active on our node return nil } @@ -1599,15 +1578,12 @@ func (npwipt *nodePortWatcherIptables) AddService(service *corev1.Service) error return nil } - netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npwipt.networkManager, - npwipt.nodeName, - service.Namespace, - ) + netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(service.Namespace) if err != nil { return fmt.Errorf("error getting active network for service %s in namespace %s: %w", service.Name, service.Namespace, err) } - if !activeOnNode { + if netInfo == nil { + // network not active on our node return nil } @@ -1634,15 +1610,12 @@ func (npwipt *nodePortWatcherIptables) UpdateService(old, new *corev1.Service) e } if util.ServiceTypeHasClusterIP(new) && util.IsClusterIPSet(new) { - netInfo, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npwipt.networkManager, - npwipt.nodeName, - new.Namespace, - ) + netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(new.Namespace) if err != nil { return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } - if !activeOnNode { + if netInfo == nil { + // network not active on our node return utilerrors.Join(errors...) } @@ -1685,22 +1658,19 @@ func (npwipt *nodePortWatcherIptables) SyncServices(services []interface{}) erro if !util.ServiceTypeHasClusterIP(service) || !util.IsClusterIPSet(service) { continue } - _, activeOnNode, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode( - npwipt.networkManager, - npwipt.nodeName, - service.Namespace, - ) + netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(service.GetNamespace()) if err != nil { // During startup sync, avoid failing the entire processExisting loop for namespaces that // require a UDN but have no primary NAD yet (or it has been deleted). Those services will // be reconciled later via regular add/update events once the NAD exists. - if util.IsUnprocessedActiveNetworkError(err) || util.IsInvalidPrimaryNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { continue } errors = append(errors, err) continue } - if !activeOnNode { + if netInfo == nil { + // network not on our node continue } // Add correct iptables rules. @@ -1852,7 +1822,7 @@ func newGateway( if config.Gateway.NodeportEnable { klog.Info("Creating Gateway Node Port Watcher") - gw.nodePortWatcher, err = newNodePortWatcher(nodeName, gwBridge, gw.openflowManager, gw.nodeIPManager, watchFactory, networkManager) + gw.nodePortWatcher, err = newNodePortWatcher(gwBridge, gw.openflowManager, gw.nodeIPManager, watchFactory, networkManager) if err != nil { return err } @@ -1873,7 +1843,6 @@ func newGateway( } func newNodePortWatcher( - nodeName string, gwBridge *bridgeconfig.BridgeConfiguration, ofm *openflowManager, nodeIPManager *addressManager, @@ -1942,7 +1911,6 @@ func newNodePortWatcher( npw := &nodePortWatcher{ dpuMode: dpuMode, - nodeName: nodeName, gatewayIPv4: gatewayIPv4, gatewayIPv6: gatewayIPv6, ofportPhys: ofportPhys, diff --git a/go-controller/pkg/node/gateway_shared_intf_test.go b/go-controller/pkg/node/gateway_shared_intf_test.go index 38cc1efe24..43078f027a 100644 --- a/go-controller/pkg/node/gateway_shared_intf_test.go +++ b/go-controller/pkg/node/gateway_shared_intf_test.go @@ -10,9 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/kubernetes/fake" @@ -22,6 +20,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -45,9 +44,9 @@ func (m *mockNetworkManagerWithNamespaceNotFoundError) GetPrimaryNADForNamespace return "", nil } -func (m *mockNetworkManagerWithNamespaceNotFoundError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { - notFoundErr := apierrors.NewNotFound(schema.GroupResource{Resource: "namespaces"}, namespace) - return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, notFoundErr) +func (m *mockNetworkManagerWithNamespaceNotFoundError) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + // Namespace is gone; new GetActiveNetworkForNamespace semantics return nil, nil. + return nil, nil } // mockNetworkManagerWithInvalidPrimaryNetworkError simulates UDN deletion scenario @@ -78,14 +77,18 @@ func (m *mockNetworkManagerWithError) GetActiveNetworkForNamespace(namespace str return nil, fmt.Errorf("network lookup failed for namespace %q", namespace) } -// mockNetworkManagerWithUnprocessedActiveNetworkError simulates a namespace that -// requires a UDN but the primary NAD has not been processed yet. -type mockNetworkManagerWithUnprocessedActiveNetworkError struct { +// mockNetworkManagerWithInvalidPrimaryNetworkSkip simulates a namespace that +// requires a primary UDN but is currently in invalid primary network state. +type mockNetworkManagerWithInvalidPrimaryNetworkSkip struct { networkmanager.Interface } -func (m *mockNetworkManagerWithUnprocessedActiveNetworkError) GetPrimaryNADForNamespace(namespace string) (string, error) { - return "", util.NewUnprocessedActiveNetworkError(namespace, "") +func (m *mockNetworkManagerWithInvalidPrimaryNetworkSkip) GetPrimaryNADForNamespace(namespace string) (string, error) { + return "", util.NewInvalidPrimaryNetworkError(namespace) +} + +func (m *mockNetworkManagerWithInvalidPrimaryNetworkSkip) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { + return nil, util.NewInvalidPrimaryNetworkError(namespace) } // mockNetworkManagerWithInactiveNode simulates a UDN where the node is inactive for the network. @@ -105,8 +108,10 @@ func (m *mockNetworkManagerWithInactiveNode) NodeHasNetwork(_, _ string) bool { return false } -func (m *mockNetworkManagerWithInactiveNode) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { - return nil, fmt.Errorf("unexpected GetActiveNetworkForNamespace call for %q", namespace) +func (m *mockNetworkManagerWithInactiveNode) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + // New code paths resolve activity directly via GetActiveNetworkForNamespace. + // Returning nil netInfo means "network not active on this node". + return nil, nil } // mockNetworkManagerWithActiveUDN simulates a UDN active on this node. @@ -348,6 +353,7 @@ var _ = Describe("SyncServices", func() { config.Gateway.Mode = config.GatewayModeLocal config.IPv4Mode = true config.IPv6Mode = false + _ = nodenft.SetFakeNFTablesHelper() fakeClient = &util.OVNNodeClientset{ KubeClient: fake.NewSimpleClientset(), @@ -374,7 +380,7 @@ var _ = Describe("SyncServices", func() { watcher.Shutdown() }) - Context("when namespace requires UDN but NAD is unprocessed", func() { + Context("when namespace has invalid primary network", func() { It("should skip service sync without failing startup", func() { service := newService(testService, testNamespace, "10.96.0.20", []corev1.ServicePort{{ @@ -386,13 +392,13 @@ var _ = Describe("SyncServices", func() { }}, corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) - npw.networkManager = &mockNetworkManagerWithUnprocessedActiveNetworkError{} + npw.networkManager = &mockNetworkManagerWithInvalidPrimaryNetworkSkip{} err := npw.SyncServices([]interface{}{service}) Expect(err).NotTo(HaveOccurred()) verifyIPTablesRule(iptV4, "10.96.0.20", 80, 30091, false, - "iptables rule should not be created when UDN is unprocessed") + "iptables rule should not be created when primary network is invalid") }) }) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index bb8f5e413e..4620b7bb69 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -340,12 +340,6 @@ func (oc *BaseUserDefinedNetworkController) shouldFilterNamespace(namespace stri nadKey, err := oc.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { - return false - } - if util.IsInvalidPrimaryNetworkError(err) { - return true - } return false } if nadKey == types.DefaultNetworkName { @@ -1049,7 +1043,7 @@ func (bnc *BaseNetworkController) GetNetworkRole(pod *corev1.Pod) (string, error pod, ) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { bnc.recordPodErrorEvent(pod, err) } return "", err @@ -1174,7 +1168,7 @@ func (bnc *BaseNetworkController) AddResourceCommon(objType reflect.Type, obj in if err != nil { // If this is a UDN namespace that hasn't been processed yet, the default // controller should skip it while UDN controllers should retry. - if bnc.GetNetworkName() == types.DefaultNetworkName && util.IsUnprocessedActiveNetworkError(err) { + if bnc.GetNetworkName() == types.DefaultNetworkName && util.IsInvalidPrimaryNetworkError(err) { return nil } // Retry until the NAD controller has processed the primary NAD for this namespace. @@ -1194,11 +1188,15 @@ func (bnc *BaseNetworkController) AddResourceCommon(objType reflect.Type, obj in return nil } } - netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(np.Namespace) + netInfo, err := bnc.networkManager.GetActiveNetworkForNamespace(np.Namespace) if err != nil { return fmt.Errorf("could not get active network for namespace %s: %v", np.Namespace, err) } - if bnc.GetNetworkName() != netinfo.GetNetworkName() { + if netInfo == nil { + // no active network, nothing to do + return nil + } + if bnc.GetNetworkName() != netInfo.GetNetworkName() { return nil } if err := bnc.addNetworkPolicy(np); err != nil { diff --git a/go-controller/pkg/ovn/base_network_controller_user_defined.go b/go-controller/pkg/ovn/base_network_controller_user_defined.go index 55df33ce13..38fae086bd 100644 --- a/go-controller/pkg/ovn/base_network_controller_user_defined.go +++ b/go-controller/pkg/ovn/base_network_controller_user_defined.go @@ -274,7 +274,11 @@ func (bsnc *BaseUserDefinedNetworkController) ensurePodForUserDefinedNetwork(pod } activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - return fmt.Errorf("failed looking for the active network at namespace '%s': %w", pod.Namespace, err) + return fmt.Errorf("failed to find active network for pod %s/%s: %w", pod.Namespace, pod.Name, err) + } + if activeNetwork == nil { + // no active network, pod doesn't belong to our controller + return nil } } @@ -627,29 +631,18 @@ func (bsnc *BaseUserDefinedNetworkController) syncPodsForUserDefinedNetwork(pods var activeNetwork util.NetInfo var err error if bsnc.IsPrimaryNetwork() { - // check to see if the primary NAD is even applicable to our controller - foundNamespaceNAD, err := bsnc.networkManager.GetPrimaryNADForNamespace(pod.Namespace) + activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - return fmt.Errorf("failed to get primary network namespace NAD: %w", err) + return fmt.Errorf("failed to find the active network for pod %s/%s: %w", pod.Namespace, pod.Name, err) } - if foundNamespaceNAD == types.DefaultNetworkName { + if activeNetwork == nil || activeNetwork.IsDefault() { + // no active network for pod, or is a default network pod continue } - networkName := bsnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) - if networkName != "" && networkName != bsnc.GetNetworkName() { + if activeNetwork.GetNetworkName() != bsnc.GetNetworkName() { + // network name found but doesn't apply to our controller continue } - activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) - if err != nil { - if apierrors.IsNotFound(err) { - // namespace is gone after we listed this pod, that means the pod no longer exists - // we don't need to preserve it's previously allocated IP address or logical switch port - klog.Infof("%s network controller pod sync: pod %s/%s namespace has been deleted, ignoring pod", - bsnc.GetNetworkName(), pod.Namespace, pod.Name) - continue - } - return fmt.Errorf("failed looking for the active network at namespace '%s': %w", pod.Namespace, err) - } } on, networkMap, err := util.GetPodNADToNetworkMappingWithActiveNetwork( diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go index e3a4009642..af64599de4 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go @@ -422,20 +422,15 @@ func (oc *EFController) sync(key string) (updateErr error) { }() activeNetwork, netErr := oc.networkManager.GetActiveNetworkForNamespace(namespace) - if netErr != nil { - if util.IsUnprocessedActiveNetworkError(netErr) { - klog.V(5).Infof("Skipping egress firewall %s/%s: primary network not ready: %v", namespace, efName, netErr) - skipStatusUpdate = true - return nil - } - if util.IsInvalidPrimaryNetworkError(netErr) { - // Namespace requires P-UDN, but it does not exist. Remove EF config and surface error in status. - updateErr = netErr - } else { - return fmt.Errorf("failed to get active network for egress firewall %s/%s namespace: %w", - namespace, efName, netErr) - } - } else { + switch { + case netErr != nil: + // Failed to resolve active network; surface this in EF status. + updateErr = netErr + case activeNetwork == nil: + // No active network for this namespace in this controller context (e.g. filtered by D-UDN): + // cleanup stale EF config but don't report an EF status error. + skipStatusUpdate = true + default: aclLoggingLevels, logErr := oc.getNamespaceACLLogging(namespace) if logErr != nil { return fmt.Errorf("failed to get acl logging levels for egress firewall %s/%s: %w", @@ -645,7 +640,10 @@ func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string } cidrSelector = egressFirewallDestination.CIDRSelector netInfo, err := oc.networkManager.GetActiveNetworkForNamespace(namespace) - if err != nil { + if netInfo == nil || err != nil { + if err == nil { + err = fmt.Errorf("no active network found for namespace %s ", namespace) + } return "", "", nil, nil, fmt.Errorf("failed to validate egress firewall destination: %w", err) } diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index 181e55eeae..428d75324d 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -604,7 +604,7 @@ func (c *Controller) skipService(name, namespace string) bool { if err != nil { // If the namespace requires a UDN that hasn't been processed yet, the default controller // should skip this service; the UDN controller will handle it once ready. - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { return c.netInfo.IsDefault() } utilruntime.HandleError(fmt.Errorf("failed to retrieve network for service %s/%s: %w", diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 779e07939f..4c6850e3b2 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -381,11 +381,16 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e for _, namespace := range namespaces { namespaceLabels := labels.Set(namespace.Labels) if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) } - if !ok { + if ni == nil { + // our node does not have this network continue } if err := e.deleteNamespaceEgressIPAssignment(ni, oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { @@ -393,11 +398,16 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e } } if newNamespaceSelector.Matches(namespaceLabels) && !oldNamespaceSelector.Matches(namespaceLabels) { - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) } - if !ok { + if ni == nil { + // our node does not have this network continue } if err := e.addNamespaceEgressIPAssignments(ni, newEIP.Name, newEIP.Status.Items, mark, namespace, newEIP.Spec.PodSelector); err != nil { @@ -422,11 +432,16 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e for _, pod := range pods { podLabels := labels.Set(pod.Labels) if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) } - if !ok { + if ni == nil { + // our node does not have this network continue } if err := e.deletePodEgressIPAssignmentsWithCleanup(ni, oldEIP.Name, oldEIP.Status.Items, pod); err != nil { @@ -434,11 +449,16 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e } } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) } - if !ok { + if ni == nil { + // our node does not have this network continue } if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { @@ -460,11 +480,16 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e namespaceLabels := labels.Set(namespace.Labels) // If the namespace does not match anymore then there's no // reason to look at the pod selector. - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) } - if !ok { + if ni == nil { + // our node does not have this network continue } if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { @@ -570,11 +595,16 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa return err } if namespaceSelector.Matches(oldLabels) && !namespaceSelector.Matches(newLabels) { - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespaceName) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespaceName) if err != nil { + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + return nil + } return fmt.Errorf("failed to get active network for namespace %s: %w", namespaceName, err) } - if !ok { + if ni == nil { + // our node does not have this network return nil } if err := e.deleteNamespaceEgressIPAssignment(ni, eIP.Name, eIP.Status.Items, oldNamespace, eIP.Spec.PodSelector); err != nil { @@ -584,11 +614,16 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa } if !namespaceSelector.Matches(oldLabels) && namespaceSelector.Matches(newLabels) { mark := getEgressIPPktMark(eIP.Name, eIP.Annotations) - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespaceName) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespaceName) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespaceName, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + return nil + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespaceName, err) } - if !ok { + if ni == nil { + // our node does not have this network return nil } if err := e.addNamespaceEgressIPAssignments(ni, eIP.Name, eIP.Status.Items, mark, newNamespace, eIP.Spec.PodSelector); err != nil { @@ -715,21 +750,22 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err deletePath = new == nil } - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) - if err != nil { + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + if err != nil && !util.IsInvalidPrimaryNetworkError(err) { return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) } - if !ok && deletePath && old != nil { + haveNetwork := ni != nil + if !haveNetwork && deletePath && old != nil { // During dynamic UDN churn, active network resolution can transiently return !ok on delete. // Fall back to the pod-assignment cache network to avoid skipping stale egressIP cleanup. if cachedNetwork := e.getNetworkFromPodAssignment(getPodKey(oldPod)); cachedNetwork != nil { ni = cachedNetwork - ok = true + haveNetwork = true klog.V(4).Infof("Using cached network %q for egressIP delete reconciliation of pod %s/%s", ni.GetNetworkName(), oldPod.Namespace, oldPod.Name) } } - if !ok { + if !haveNetwork { return nil } if !podSelector.Empty() { @@ -787,11 +823,14 @@ func (e *EgressIPController) addEgressIPAssignments(name string, statusAssignmen } var errs []error for _, namespace := range namespaces { - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { + if util.IsInvalidPrimaryNetworkError(err) { + continue + } return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } - if !ok { + if ni == nil { continue } if err := e.addNamespaceEgressIPAssignments(ni, name, statusAssignments, mark, namespace, podSelector); err != nil { @@ -1387,26 +1426,6 @@ func (e *EgressIPController) getALocalZoneNodeName() (string, error) { return "", fmt.Errorf("failed to find a local OVN zone Node") } -// resolveActiveNetworkForNamespaceOnLocalNode returns the active network for the namespace -// only if the network is active on the local zone node. It returns (nil, false, nil) when -// the network is not active on this zone or the namespace/UDN is not yet processed. -func (e *EgressIPController) resolveActiveNetworkForNamespaceOnLocalNode(namespace string) (util.NetInfo, bool, error) { - localNodeName, err := e.getALocalZoneNodeName() - if err != nil { - klog.V(5).Infof("No local zone node found while resolving network for namespace %q: %v", namespace, err) - return nil, false, nil - } - - netInfo, ok, err := networkmanager.ResolveActiveNetworkForNamespaceOnNode(e.networkManager, localNodeName, namespace) - if err != nil { - if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { - return nil, false, nil - } - return nil, false, err - } - return netInfo, ok, nil -} - func (e *EgressIPController) StartNADReconciler() error { if e.networkManager == nil || e.nadReconciler == nil { return nil @@ -1423,6 +1442,9 @@ func (e *EgressIPController) StartNADReconciler() error { } func (e *EgressIPController) StopNADReconciler() { + if e.nadReconciler == nil { + return + } if e.nadReconcilerRegistered { if err := e.networkManager.DeRegisterNADReconciler(e.nadReconcilerID); err != nil { klog.Warningf("Failed to deregister egress IP NAD reconciler: %v", err) @@ -2087,12 +2109,12 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { cache.networkToRouter = map[string]string{} // build a map of networks -> nodes -> redirect IP for _, namespace := range namespaces { - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { klog.Errorf("Failed to get active network for namespace %s, stale objects may remain: %v", namespace.Name, err) continue } - if !ok { + if ni == nil { klog.V(5).Infof("Skipping namespace %s while building egress IP cache: network not active on local zone", namespace.Name) continue } @@ -2243,16 +2265,16 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { klog.Errorf("Error building egress IP sync cache, cannot retrieve pods for namespace: %s and egress IP: %s, err: %v", namespace.Name, egressIP.Name, err) continue } - ni, ok, err := e.resolveActiveNetworkForNamespaceOnLocalNode(namespace.Name) + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { klog.Errorf("Failed to get active network for namespace %s, skipping sync: %v", namespace.Name, err) continue } - if !ok { + if ni == nil { klog.V(5).Infof("Skipping namespace %s while building egress IP sync cache: network not active on local zone", namespace.Name) continue } - _, ok = egressIPsCache[egressIP.Name][ni.GetNetworkName()] + _, ok := egressIPsCache[egressIP.Name][ni.GetNetworkName()] if ok { continue // aready populated } diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 6299177a70..266d05aaaf 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -366,27 +366,6 @@ func IsClusterIP(svcVIP string) bool { return false } -// UnprocessedActiveNetworkError indicates that the primary UDN is required and exists -// for a namespace, but our network manager has not processed it yet. -type UnprocessedActiveNetworkError struct { - namespace string - udnName string -} - -func (m *UnprocessedActiveNetworkError) Error() string { - return fmt.Sprintf("primary UDN %q exists in namespace %s, but NAD has not been processed yet", - m.udnName, m.namespace) -} - -func IsUnprocessedActiveNetworkError(err error) bool { - var unprocessedActiveNetworkError *UnprocessedActiveNetworkError - return errors.As(err, &unprocessedActiveNetworkError) -} - -func NewUnprocessedActiveNetworkError(namespace, udnName string) *UnprocessedActiveNetworkError { - return &UnprocessedActiveNetworkError{namespace: namespace, udnName: udnName} -} - // InvalidPrimaryNetworkError indicates that the namespace requires a primary UDN, but no primary UDN exists yet type InvalidPrimaryNetworkError struct { namespace string From a760e77bd89ef3c492faf762c76ffc50b445532d Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 13 Feb 2026 11:35:33 -0500 Subject: [PATCH 19/59] Improve EgressFW subnet parsing - Removes a second call to GetActiveNetworkForNamespace during egress firewall add. We can just use the cache object that already exists. - Restructure the cache object to be a slice of subnets, rather than a string key. - Fix util function CopyIPNets, which was not doing a deep copy of the underlying IP/Mask slices. Signed-off-by: Tim Rozet --- .../egressfirewall/egress_firewall_test.go | 9 +++- .../egressfirewall/egressfirewall.go | 46 ++++++++---------- .../egressfirewall_sync_test.go | 2 +- go-controller/pkg/util/net.go | 47 ++++++++++++++++++- 4 files changed, 73 insertions(+), 31 deletions(-) diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go b/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go index fbcacf04b6..94f87f0ff2 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go @@ -484,7 +484,7 @@ var _ = ginkgo.Describe("OVN test basic functions", func() { subnets = append(subnets, config.CIDRNetworkEntry{CIDR: cidr}) } config.Default.ClusterSubnets = subnets - entry := &cacheEntry{} + entry := &cacheEntry{subnets: subnetsForNetInfo(&util.DefaultNetInfo{})} output, err := efController.newEgressFirewallRule("default", tc.egressFirewallRule, tc.id, entry) if tc.err == true { gomega.Expect(err).To(gomega.HaveOccurred()) @@ -716,8 +716,13 @@ func TestValidateAndGetEgressFirewallDestination(t *testing.T) { if len(tc.udnName) > 0 { network = tc.udnName } + entry := &cacheEntry{subnets: subnetsForNetInfo(&util.DefaultNetInfo{})} + if len(tc.udnName) > 0 { + entry.subnets = subnetsForNetInfo(netInfo) + } + cidrSelector, dnsName, clusterSubnetIntersection, nodeSelector, err := - efController.validateAndGetEgressFirewallDestination(network, tc.egressFirewallDestination) + efController.validateAndGetEgressFirewallDestination(network, tc.egressFirewallDestination, entry) if tc.expectedErr { require.Error(t, err) } else { diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go index af64599de4..8d9dc37403 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go @@ -110,7 +110,7 @@ type matchKind int type cacheEntry struct { pgName string hasNodeSelector bool - subnetsKey string + subnets []*net.IPNet efResourceVersion string logHash string } @@ -439,7 +439,7 @@ func (oc *EFController) sync(key string) (updateErr error) { ownerController := activeNetwork.GetNetworkName() + "-network-controller" newEntry = &cacheEntry{ pgName: libovsdbutil.GetPortGroupName(getNamespacePortGroupDbIDs(namespace, ownerController)), - subnetsKey: subnetsKeyForNetInfo(activeNetwork), + subnets: subnetsForNetInfo(activeNetwork), efResourceVersion: ef.ResourceVersion, logHash: aclLogHash(aclLoggingLevels), } @@ -535,20 +535,19 @@ func (oc *EFController) sync(key string) (updateErr error) { return } -func subnetsKeyForNetInfo(netInfo util.NetInfo) string { +func subnetsForNetInfo(netInfo util.NetInfo) []*net.IPNet { if netInfo == nil { - return "" + return nil } subnets := netInfo.Subnets() - if len(subnets) == 0 { - return "" - } - keys := make([]string, 0, len(subnets)) - for _, s := range subnets { - keys = append(keys, s.String()) + unsortedSubnets := make([]*net.IPNet, 0, len(subnets)) + for _, subnet := range subnets { + if subnet.CIDR == nil { + continue + } + unsortedSubnets = append(unsortedSubnets, subnet.CIDR) } - slices.Sort(keys) - return strings.Join(keys, ",") + return util.CopyIPNets(unsortedSubnets) } func entriesEqual(a, b *cacheEntry) bool { @@ -559,7 +558,7 @@ func entriesEqual(a, b *cacheEntry) bool { return false default: return a.pgName == b.pgName && - a.subnetsKey == b.subnetsKey && + util.IsIPNetsEqual(a.subnets, b.subnets) && a.efResourceVersion == b.efResourceVersion && a.logHash == b.logHash } @@ -619,7 +618,7 @@ func (oc *EFController) addEgressFirewall(egressFirewall *egressfirewallapi.Egre // validateAndGetEgressFirewallDestination validates an egress firewall rule destination and returns // the parsed contents of the destination. -func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string, egressFirewallDestination egressfirewallapi.EgressFirewallDestination) ( +func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string, egressFirewallDestination egressfirewallapi.EgressFirewallDestination, entry *cacheEntry) ( cidrSelector string, dnsName string, clusterSubnetIntersection []*net.IPNet, @@ -639,18 +638,13 @@ func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string return "", "", nil, nil, err } cidrSelector = egressFirewallDestination.CIDRSelector - netInfo, err := oc.networkManager.GetActiveNetworkForNamespace(namespace) - if netInfo == nil || err != nil { - if err == nil { - err = fmt.Errorf("no active network found for namespace %s ", namespace) - } - return "", "", nil, nil, - fmt.Errorf("failed to validate egress firewall destination: %w", err) + if entry == nil || entry.subnets == nil { + return "", "", nil, nil, fmt.Errorf("failed to "+ + "validate egress firewall destination: missing cached subnets for namespace %s", namespace) } - subnets := netInfo.Subnets() - for _, clusterSubnet := range subnets { - if clusterSubnet.CIDR.Contains(ipNet.IP) || ipNet.Contains(clusterSubnet.CIDR.IP) { - clusterSubnetIntersection = append(clusterSubnetIntersection, clusterSubnet.CIDR) + for _, clusterSubnet := range entry.subnets { + if clusterSubnet.Contains(ipNet.IP) || ipNet.Contains(clusterSubnet.IP) { + clusterSubnetIntersection = append(clusterSubnetIntersection, clusterSubnet) } } } else { @@ -678,7 +672,7 @@ func (oc *EFController) newEgressFirewallRule(namespace string, rawEgressFirewal // fields of efr. var err error efr.to.cidrSelector, efr.to.dnsName, efr.to.clusterSubnetIntersection, efr.to.nodeSelector, err = - oc.validateAndGetEgressFirewallDestination(namespace, rawEgressFirewallRule.To) + oc.validateAndGetEgressFirewallDestination(namespace, rawEgressFirewallRule.To, entry) if err != nil { return efr, err } diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go index eb280d5109..5c6ec709c6 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go @@ -187,5 +187,5 @@ func TestEFControllerSync_UpdatesOnSubnetChangeAndSkipsWhenUnchanged(t *testing. entry, ok := oc.cache.Load(namespace) require.True(t, ok) require.Equal(t, pgName, entry.pgName) - require.Equal(t, subnetsKeyForNetInfo(netInfo2), entry.subnetsKey) + require.True(t, util.IsIPNetsEqual(subnetsForNetInfo(netInfo2), entry.subnets)) } diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 6016a946b5..e4628b36d2 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -7,6 +7,7 @@ import ( "fmt" "math/big" "net" + "slices" "strconv" "strings" @@ -329,12 +330,54 @@ func GenerateRandMAC() (net.HardwareAddr, error) { func CopyIPNets(ipnets []*net.IPNet) []*net.IPNet { copy := make([]*net.IPNet, len(ipnets)) for i := range ipnets { - ipnet := *ipnets[i] - copy[i] = &ipnet + if ipnets[i] == nil { + continue + } + copy[i] = &net.IPNet{ + IP: slices.Clone(ipnets[i].IP), + Mask: slices.Clone(ipnets[i].Mask), + } } return copy } +func isIPNetEqual(ipn1, ipn2 *net.IPNet) bool { + if ipn1 == ipn2 { + return true + } + if ipn1 == nil || ipn2 == nil { + return false + } + m1, _ := ipn1.Mask.Size() + m2, _ := ipn2.Mask.Size() + return m1 == m2 && ipn1.IP.Equal(ipn2.IP) +} + +// IsIPNetsEqual returns true if both IPNet slices are equal in length and values, regardless of order. +func IsIPNetsEqual(ipn1, ipn2 []*net.IPNet) bool { + if len(ipn1) != len(ipn2) { + return false + } + used := make([]bool, len(ipn2)) + for i := range ipn1 { + found := false + for j := range ipn2 { + if used[j] { + continue + } + if isIPNetEqual(ipn1[i], ipn2[j]) { + used[j] = true + found = true + break + } + } + if !found { + return false + } + } + return true +} + // IPsToNetworkIPs returns the network CIDRs of the provided IP CIDRs func IPsToNetworkIPs(ips ...*net.IPNet) []*net.IPNet { nets := make([]*net.IPNet, len(ips)) From 0e44890df4ddeeff1f70220ff884160ecd482e55 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Mon, 9 Feb 2026 17:14:44 -0500 Subject: [PATCH 20/59] EgressIP: Fix crash from mutating node informer object Code was modifying the annotations of the informer cache node object. If this was happening while another goroutine was reading the annotation map, it would trigger ovnkube to crash! Fixes: #5950 Signed-off-by: Tim Rozet --- .../pkg/node/controllers/egressip/egressip.go | 24 ++++++++++++++----- .../pkg/node/egressip/gateway_egressip.go | 16 +++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/go-controller/pkg/node/controllers/egressip/egressip.go b/go-controller/pkg/node/controllers/egressip/egressip.go index 08726875a3..ebc04f0310 100644 --- a/go-controller/pkg/node/controllers/egressip/egressip.go +++ b/go-controller/pkg/node/controllers/egressip/egressip.go @@ -1142,8 +1142,12 @@ func (c *Controller) migrateFromAddrLabelToAnnotation() error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } @@ -1174,8 +1178,12 @@ func (c *Controller) addIPToAnnotation(ip string) error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } @@ -1206,8 +1214,12 @@ func (c *Controller) deleteIPFromAnnotation(ip string) error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } diff --git a/go-controller/pkg/node/egressip/gateway_egressip.go b/go-controller/pkg/node/egressip/gateway_egressip.go index 83657404b8..daf196bf39 100644 --- a/go-controller/pkg/node/egressip/gateway_egressip.go +++ b/go-controller/pkg/node/egressip/gateway_egressip.go @@ -376,8 +376,12 @@ func (g *BridgeEIPAddrManager) addIPToAnnotation(candidateIP net.IP) error { if err != nil { return err } - node.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) - return g.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) + return g.kube.UpdateNodeStatus(nodeToUpdate) }) } @@ -412,8 +416,12 @@ func (g *BridgeEIPAddrManager) deleteIPsFromAnnotation(candidateIPs ...net.IP) e if err != nil { return err } - node.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) - return g.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) + return g.kube.UpdateNodeStatus(nodeToUpdate) }) } From 3cfbafff7cdd0e4c4dea0374763e017bde2cbf7e Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 11 Feb 2026 10:02:49 -0500 Subject: [PATCH 21/59] Fix hybrid overlay mutating informer pod object Signed-off-by: Tim Rozet --- go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go b/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go index 1bb5593609..ad9f5e2b50 100644 --- a/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go @@ -89,8 +89,9 @@ func (n *HONodeController) AddPod(pod *corev1.Pod) error { _, ok := pod.Annotations[util.OvnPodAnnotationName] if ok { klog.Infof("Remove the ovnkube pod annotation from pod %s", pod.Name) - delete(pod.Annotations, util.OvnPodAnnotationName) - if err := n.kube.UpdatePodStatus(pod); err != nil { + podToUpdate := pod.DeepCopy() + delete(podToUpdate.Annotations, util.OvnPodAnnotationName) + if err := n.kube.UpdatePodStatus(podToUpdate); err != nil { return fmt.Errorf("failed to remove ovnkube pod annotation from pod %s: %v", pod.Name, err) } return nil From b95fc8081bf4b509944099303c39c4910c41d55a Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 11 Feb 2026 11:22:05 -0500 Subject: [PATCH 22/59] Fixes gateway egress IP node update logic Gateway egress IP adds IPs to an annotation on the node. The code was assuming the informer object should have the latest data, then overwriting the IPs using that information. That isn't reliable as the informer could have stale data compared to recent kubeclient updates. This would trigger egress IP logic to corrupt the IPs in the node annotation, and cause further drift/corruption in subsequent updates. This fixes it by creating a local cache of IPs for the controller, and using that as the source of truth, initialized on start up from the node object. Then updates are driven by what is in the cache, versus what is in the informer. Also fixes places where tests should have been using Eventually. Signed-off-by: Tim Rozet --- .../pkg/node/egressip/gateway_egressip.go | 89 ++++++++----------- .../node/egressip/gateway_egressip_test.go | 70 ++++++++++----- 2 files changed, 84 insertions(+), 75 deletions(-) diff --git a/go-controller/pkg/node/egressip/gateway_egressip.go b/go-controller/pkg/node/egressip/gateway_egressip.go index daf196bf39..cccc79642b 100644 --- a/go-controller/pkg/node/egressip/gateway_egressip.go +++ b/go-controller/pkg/node/egressip/gateway_egressip.go @@ -175,6 +175,7 @@ type BridgeEIPAddrManager struct { nodeName string bridgeName string nodeAnnotationMu sync.Mutex + annotationIPs sets.Set[string] eIPLister egressiplisters.EgressIPLister eIPInformer cache.SharedIndexInformer nodeLister corev1listers.NodeLister @@ -195,6 +196,7 @@ func NewBridgeEIPAddrManager(nodeName, bridgeName string, linkManager *linkmanag nodeName: nodeName, // k8 node name bridgeName: bridgeName, // bridge name for which EIP IPs are managed nodeAnnotationMu: sync.Mutex{}, // mu for updating Node annotation + annotationIPs: sets.New[string](), eIPLister: eIPInformer.Lister(), eIPInformer: eIPInformer.Informer(), nodeLister: nodeInformer.Lister(), @@ -305,6 +307,9 @@ func (g *BridgeEIPAddrManager) SyncEgressIP(objs []interface{}) error { if err != nil { return fmt.Errorf("failed to sync EgressIP gateway config because unable to get Node annotation: %v", err) } + g.nodeAnnotationMu.Lock() + g.annotationIPs = sets.New[string](getIPsStr(annotIPs...)...) + g.nodeAnnotationMu.Unlock() configs := markIPs{v4: map[int]string{}, v6: map[int]string{}} for _, obj := range objs { eip, ok := obj.(*egressipv1.EgressIP) @@ -349,30 +354,15 @@ func (g *BridgeEIPAddrManager) SyncEgressIP(objs []interface{}) error { return nil } -// addIPToAnnotation adds an address to the collection of existing addresses stored in the nodes annotation. Caller -// may repeat addition of addresses without care for duplicate addresses being added. -func (g *BridgeEIPAddrManager) addIPToAnnotation(candidateIP net.IP) error { - g.nodeAnnotationMu.Lock() - defer g.nodeAnnotationMu.Unlock() +// updateAnnotationLocked updates the node's egress IPs +// Must be called with nodeAnnotationMu locked +func (g *BridgeEIPAddrManager) updateAnnotationLocked(updatedIPs sets.Set[string]) error { return retry.RetryOnConflict(retry.DefaultRetry, func() error { node, err := g.nodeLister.Get(g.nodeName) if err != nil { return err } - existingIPsStr, err := util.ParseNodeBridgeEgressIPsAnnotation(node) - if err != nil { - if util.IsAnnotationNotSetError(err) { - existingIPsStr = make([]string, 0) - } else { - return fmt.Errorf("failed to parse annotation key %q from node object: %v", util.OVNNodeBridgeEgressIPs, err) - } - } - existingIPsSet := sets.New[string](existingIPsStr...) - candidateIPStr := candidateIP.String() - if existingIPsSet.Has(candidateIPStr) { - return nil - } - patch, err := json.Marshal(existingIPsSet.Insert(candidateIPStr).UnsortedList()) + patch, err := json.Marshal(updatedIPs.UnsortedList()) if err != nil { return err } @@ -385,44 +375,39 @@ func (g *BridgeEIPAddrManager) addIPToAnnotation(candidateIP net.IP) error { }) } +// addIPToAnnotation adds an address to the collection of existing addresses stored in the nodes annotation. Caller +// may repeat addition of addresses without care for duplicate addresses being added. +func (g *BridgeEIPAddrManager) addIPToAnnotation(candidateIP net.IP) error { + g.nodeAnnotationMu.Lock() + defer g.nodeAnnotationMu.Unlock() + updatedIPs := sets.New[string](g.annotationIPs.UnsortedList()...) + updatedIPs.Insert(candidateIP.String()) + if updatedIPs.Equal(g.annotationIPs) { + return nil + } + if err := g.updateAnnotationLocked(updatedIPs); err != nil { + return err + } + g.annotationIPs = updatedIPs + return nil +} + // deleteIPsFromAnnotation deletes address from annotation. If multiple users, callers must synchronise. // deletion of address that doesn't exist will not cause an error. func (g *BridgeEIPAddrManager) deleteIPsFromAnnotation(candidateIPs ...net.IP) error { g.nodeAnnotationMu.Lock() defer g.nodeAnnotationMu.Unlock() - return retry.RetryOnConflict(retry.DefaultRetry, func() error { - node, err := g.nodeLister.Get(g.nodeName) - if err != nil { - return err - } - existingIPsStr, err := util.ParseNodeBridgeEgressIPsAnnotation(node) - if err != nil { - if util.IsAnnotationNotSetError(err) { - existingIPsStr = make([]string, 0) - } else { - return fmt.Errorf("failed to parse annotation key %q from node object: %v", util.OVNNodeBridgeEgressIPs, err) - } - } - if len(existingIPsStr) == 0 { - return nil - } - existingIPsSet := sets.New[string](existingIPsStr...) - candidateIPsStr := getIPsStr(candidateIPs...) - if !existingIPsSet.HasAny(candidateIPsStr...) { - return nil - } - existingIPsSet.Delete(candidateIPsStr...) - patch, err := json.Marshal(existingIPsSet.UnsortedList()) - if err != nil { - return err - } - nodeToUpdate := node.DeepCopy() - if nodeToUpdate.Annotations == nil { - nodeToUpdate.Annotations = map[string]string{} - } - nodeToUpdate.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) - return g.kube.UpdateNodeStatus(nodeToUpdate) - }) + candidateIPsStr := getIPsStr(candidateIPs...) + updatedIPs := sets.New[string](g.annotationIPs.UnsortedList()...) + updatedIPs.Delete(candidateIPsStr...) + if updatedIPs.Equal(g.annotationIPs) { + return nil + } + if err := g.updateAnnotationLocked(updatedIPs); err != nil { + return err + } + g.annotationIPs = updatedIPs + return nil } func (g *BridgeEIPAddrManager) addIPBridge(ip net.IP) error { diff --git a/go-controller/pkg/node/egressip/gateway_egressip_test.go b/go-controller/pkg/node/egressip/gateway_egressip_test.go index 6493cb968a..816219df0f 100644 --- a/go-controller/pkg/node/egressip/gateway_egressip_test.go +++ b/go-controller/pkg/node/egressip/gateway_egressip_test.go @@ -12,6 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -72,9 +73,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.AddEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -122,9 +125,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.AddEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -164,9 +169,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.UpdateEgressIP(unassignedEIP, assignedEIP) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -189,9 +196,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err = addrMgr.UpdateEgressIP(assignedEIP, unassignedEIP) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).ShouldNot(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, @@ -250,9 +259,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err = addrMgr.DeleteEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).ShouldNot(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, @@ -290,9 +301,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { eipUnassigned3 := getEIPNotAssignedToNode(mark3, ipV4Addr3) err := addrMgr.SyncEgressIP([]interface{}{eipAssigned1, eipAssigned2, eipUnassigned3}) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process valid EgressIPs") - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, @@ -374,9 +387,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { // Verify cleanup: secondary IP removed from cache, annotation, and bridge gomega.Expect(addrMgr.cache.IsIPPresent(net.ParseIP(secondaryIP))).Should(gomega.BeFalse(), "secondary IP should be removed from cache") - node, err = addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr), "only valid OVN IP should be in annotation") + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr), "only valid OVN IP should be in annotation") gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(secondaryIP), bridgeLinkIndex))).Should(gomega.BeTrue(), "should delete secondary IP from bridge") gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, @@ -411,8 +426,17 @@ func initBridgeEIPAddrManagerWithHostCIDRs(nodeName, bridgeName string, bridgeEI gomega.Expect(watchFactory.Start()).Should(gomega.Succeed(), "watch factory should start") gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "watch factory creation must succeed") linkManager := linkmanager.NewController(nodeName, true, true, nil) - return NewBridgeEIPAddrManager(nodeName, bridgeName, linkManager, &kube.Kube{KClient: client}, watchFactory.EgressIPInformer(), watchFactory.NodeCoreInformer()), - watchFactory.Shutdown + addrMgr := NewBridgeEIPAddrManager(nodeName, bridgeName, linkManager, &kube.Kube{KClient: client}, watchFactory.EgressIPInformer(), watchFactory.NodeCoreInformer()) + initialAnnotIPs, err := util.ParseNodeBridgeEgressIPsAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + initialAnnotIPs = make([]string, 0) + } else { + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "bridge EgressIP annotation should be parseable") + } + } + addrMgr.annotationIPs = sets.New[string](initialAnnotIPs...) + return addrMgr, watchFactory.Shutdown } func getEIPAssignedToNode(nodeName, mark, assignedIP string) *egressipv1.EgressIP { From 16480b9eafb83a74ea64c275c1429e4b4002c277 Mon Sep 17 00:00:00 2001 From: fangyuchen86 Date: Sat, 14 Feb 2026 09:10:20 +0800 Subject: [PATCH 23/59] fix issues for adding SAIC Motor to Adopters Signed-off-by: fangyuchen86 --- ADOPTERS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ADOPTERS.md b/ADOPTERS.md index 654a19a650..a513042e95 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -5,7 +5,7 @@ 1. Red Hat, Inc. (Uses OVN-Kubernetes as their default CNI in OpenShift product) 2. NVIDIA (Uses OVN-Kubernetes in their production environments) 3. Internet Initiative Japan Inc. (Uses OVN-Kubernetes in their on-premise Kubernetes platform) -4. SAIC Motor Corp. Ltd (Use OVN-Kubernetes as network solution to build multi-tenant private cloud) +4. SAIC Motor Corp. Ltd (Uses OVN-Kubernetes as a networking solution to build a multi-tenant private cloud) ## Projects From 32f5e7c5f77449728092b2ede60f637efbdcd5c0 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Sun, 15 Feb 2026 11:59:17 -0500 Subject: [PATCH 24/59] Fixes Egress IP Tracker UT In Egress IP tracker when GetPrimaryNADForNamespace returns an InvalidPrimaryNetworkError we return nil during the sync, as we expect the NAD controller to deliver the event later when the NAD is processed. However, in this UT there is no full NAD controller and it relies on the lister. Therefore the UT may run before the informer cache is populated and never get notified from the "NAD Controller". To fix it, wait until the informer cache is populated and then simulate the NAD Controller behavior by Reconciling the NAD key. Fixes: #5953 Signed-off-by: Tim Rozet --- .../pkg/networkmanager/egressip_tracker_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/go-controller/pkg/networkmanager/egressip_tracker_test.go b/go-controller/pkg/networkmanager/egressip_tracker_test.go index e82d2b9813..68069b5e6a 100644 --- a/go-controller/pkg/networkmanager/egressip_tracker_test.go +++ b/go-controller/pkg/networkmanager/egressip_tracker_test.go @@ -278,6 +278,15 @@ func TestEgressIPTrackerControllerWithInformer(t *testing.T) { }, metav1.CreateOptions{}) g.Expect(err).NotTo(gomega.HaveOccurred()) + // Mirror production ordering: NAD controller notifies registered reconcilers + // after the primary NAD is observed, so namespace reconcile isn't dropped due + // to a transient "primary not found" window in informer caches. + primaryNADKey := util.GetNADName(tt.namespace, "primary") + g.Eventually(func() (string, error) { + return tracker.primaryNADForNamespace(tt.namespace) + }, 2*time.Second, 100*time.Millisecond).Should(gomega.Equal(primaryNADKey)) + tracker.NADReconciler().Reconcile(primaryNADKey) + // Expect add events g.Eventually(func() []callbackEvent { gotMu.Lock() From 7606fd885c17325b5d9f41c8f05da0c68017f484 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Wed, 28 Jan 2026 17:44:44 +0100 Subject: [PATCH 25/59] CUDN: cleanup NADs in terminating namespaces without pods Skip namespaces with deletionTimestamp set when selecting target namespaces, triggering NAD deletion for terminating namespaces. Signed-off-by: Patryk Diak --- .../userdefinednetwork/controller.go | 26 ++++++++++---- .../userdefinednetwork/controller_test.go | 26 ++++++++++++++ .../userdefinednetwork/notifier/namespace.go | 3 +- test/e2e/network_segmentation.go | 34 +++++++++++++++++++ 4 files changed, 81 insertions(+), 8 deletions(-) diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go index f1ffd0dcc3..3bd6ca3086 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go @@ -504,14 +504,14 @@ func (c *Controller) ReconcileNetAttachDef(key string) error { // ReconcileNamespace enqueue relevant Cluster UDN CR requests following namespace events. func (c *Controller) ReconcileNamespace(key string) error { namespace, err := c.namespaceInformer.Lister().Get(key) - if err != nil { - // Ignore removed namespaces - if apierrors.IsNotFound(err) { - return nil - } + if err != nil && !apierrors.IsNotFound(err) { return fmt.Errorf("failed to get namespace %q from cache: %w", key, err) } - namespaceLabels := labels.Set(namespace.Labels) + + var namespaceLabels labels.Set + if namespace != nil { + namespaceLabels = namespace.Labels + } c.namespaceTrackerLock.RLock() defer c.namespaceTrackerLock.RUnlock() @@ -519,8 +519,16 @@ func (c *Controller) ReconcileNamespace(key string) error { for cudnName, affectedNamespaces := range c.namespaceTracker { affectedNamespace := affectedNamespaces.Has(key) - selectedNamespace := false + // For deleted namespaces, only reconcile if tracked + if namespace == nil { + if affectedNamespace { + klog.Errorf("BUG: namespace %q was deleted but still tracked by ClusterUDN %q, forcing reconcile to cleanup", key, cudnName) + c.cudnController.Reconcile(cudnName) + } + continue + } + selectedNamespace := false if !affectedNamespace { cudn, err := c.cudnLister.Get(cudnName) if err != nil { @@ -912,6 +920,10 @@ func (c *Controller) getSelectedNamespaces(sel metav1.LabelSelector) (sets.Set[s return nil, fmt.Errorf("failed to list namespaces: %w", err) } for _, selectedNs := range selectedNamespacesList { + if !selectedNs.DeletionTimestamp.IsZero() { + klog.V(5).Infof("Namespace %s is being deleted, skipping", selectedNs.Name) + continue + } selectedNamespaces.Insert(selectedNs.Name) } return selectedNamespaces, nil diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go index 166931625d..b609859daa 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go @@ -2058,6 +2058,32 @@ var _ = Describe("User Defined Network Controller", func() { Expect(err).To(MatchError(expectedErr)) }) + It("when namespace without pods is being deleted, should delete NAD in that namespace", func() { + const cudnName = "test-network" + testNs := testNamespace("blue") + cudn := testClusterUDN(cudnName, testNs.Name) + expectedNAD := testClusterUdnNAD(cudnName, testNs.Name) + c := newTestController(renderNadStub(expectedNAD), cudn, testNs) + Expect(c.Run()).To(Succeed()) + + By("verify NAD is created in namespace") + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudnName, metav1.GetOptions{}) + return err + }).Should(Succeed()) + + By("mark namespace as terminating") + testNs.DeletionTimestamp = &metav1.Time{Time: time.Now()} + _, err := cs.KubeClient.CoreV1().Namespaces().Update(context.Background(), testNs, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + By("verify NAD is deleted") + Eventually(func() bool { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudnName, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }).Should(BeTrue(), "NAD should be deleted when namespace is terminating") + }) + It("when CR is deleted, CR has no finalizer, should succeed", func() { deletedCUDN := testClusterUDN("test", "blue") deletedCUDN.Finalizers = []string{} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go index 90ff81befc..d6dbf634f2 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go @@ -46,10 +46,11 @@ func NewNamespaceNotifier(nsInformer corev1informer.NamespaceInformer, subscribe func (c *NamespaceNotifier) needUpdate(old, new *corev1.Namespace) bool { nsCreated := old == nil && new != nil nsDeleted := old != nil && new == nil + nsDeleting := new != nil && !new.DeletionTimestamp.IsZero() nsLabelsChanged := old != nil && new != nil && !reflect.DeepEqual(old.Labels, new.Labels) - return nsCreated || nsDeleted || nsLabelsChanged + return nsCreated || nsDeleted || nsDeleting || nsLabelsChanged } // reconcile notify subscribers with the request namespace key following namespace events. diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 0aefc2236c..2760d5424e 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -1317,6 +1317,40 @@ spec: } }) + It("should delete NAD when target namespace is terminating", func() { + testTerminatingNs := f.Namespace.Name + "terminating" + + By("add new target namespace to CR namespace-selector") + patch := fmt.Sprintf(`[{"op": "add", "path": "./spec/namespaceSelector/matchExpressions/0/values/-", "value": "%s"}]`, testTerminatingNs) + _, err := e2ekubectl.RunKubectl("", "patch", clusterUserDefinedNetworkResource, testClusterUdnName, "--type=json", "-p="+patch) + Expect(err).NotTo(HaveOccurred()) + + By("create the target namespace") + _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testTerminatingNs, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, + }}, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verify NAD is created in the namespace") + Eventually(func() error { + _, err := nadClient.NetworkAttachmentDefinitions(testTerminatingNs).Get(context.Background(), testClusterUdnName, metav1.GetOptions{}) + return err + }, time.Second*15, time.Second*1).Should(Succeed(), "NAD should be created in target namespace") + + By("delete the namespace to trigger termination") + err = cs.CoreV1().Namespaces().Delete(context.Background(), testTerminatingNs, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verify NAD is deleted from the terminating namespace") + Eventually(func() bool { + _, err := nadClient.NetworkAttachmentDefinitions(testTerminatingNs).Get(context.Background(), testClusterUdnName, metav1.GetOptions{}) + return err != nil && kerrors.IsNotFound(err) + }, time.Second*30, time.Second*1).Should(BeTrue(), + "NAD should be deleted when namespace is terminating") + }) + It("should create NAD in new created namespaces that apply to namespace-selector", func() { testNewNs := f.Namespace.Name + "green" From c2c8e8e55d2db4232ee868340c0b8ed504b802d4 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 12 Feb 2026 17:10:07 +0100 Subject: [PATCH 26/59] Reduce obj_retry.go log spam - Only log "Retry successful" when there were actual retries (failedAttempts > 0), not on every first-attempt success. - Move terminal state pod detection messages to V(5). - Remove duplicate "Update event received" V(5) log that fired before the equality check. There is already a log line for actual updates. Signed-off-by: Patryk Diak --- go-controller/pkg/retry/obj_retry.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/go-controller/pkg/retry/obj_retry.go b/go-controller/pkg/retry/obj_retry.go index c9de84d8c2..27e93d001f 100644 --- a/go-controller/pkg/retry/obj_retry.go +++ b/go-controller/pkg/retry/obj_retry.go @@ -415,7 +415,9 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) { } } - klog.Infof("Retry successful for %s %s after %d failed attempt(s)", r.ResourceHandler.ObjType, objKey, entry.failedAttempts) + if entry.failedAttempts > 0 { + klog.Infof("Retry successful for %s %s after %d failed attempt(s)", r.ResourceHandler.ObjType, objKey, entry.failedAttempts) + } if initObj != nil { r.ResourceHandler.RecordSuccessEvent(initObj) } @@ -489,13 +491,13 @@ func (r *RetryFramework) processObjectInTerminalState(obj interface{}, lockedKey _, loaded := r.terminatedObjects.LoadOrStore(lockedKey, true) if loaded { // object was already terminated - klog.Infof("Detected object %s of type %s in terminal state (e.g. completed) will be "+ + klog.V(5).Infof("Detected object %s of type %s in terminal state (e.g. completed) will be "+ "ignored as it has already been processed", lockedKey, r.ResourceHandler.ObjType) return } // The object is in a terminal state: delete it from the cluster, delete its retry entry and return. - klog.Infof("Detected object %s of type %s in terminal state (e.g. completed)"+ + klog.V(5).Infof("Detected object %s of type %s in terminal state (e.g. completed)"+ " during %s event: will remove it", lockedKey, r.ResourceHandler.ObjType, event) internalCacheEntry := r.ResourceHandler.GetInternalCacheEntry(obj) retryEntry := r.initRetryObjWithDelete(obj, lockedKey, internalCacheEntry, true) // set up the retry obj for deletion @@ -597,8 +599,6 @@ func (r *RetryFramework) WatchResourceFiltered(namespaceForFilteredHandler strin r.ResourceHandler.ObjType, err) return } - klog.V(5).Infof("Update event received for resource %s, old object is equal to new: %t", - r.ResourceHandler.ObjType, areEqual) if areEqual { return } @@ -650,7 +650,6 @@ func (r *RetryFramework) WatchResourceFiltered(namespaceForFilteredHandler strin } klog.V(5).Infof("Update event received for %s %s", r.ResourceHandler.ObjType, newKey) - r.DoWithLock(newKey, func(key string) { // STEP 1: // Delete existing (old) object if: From 56f2d8d898008187483b2c527926cd73e149074a Mon Sep 17 00:00:00 2001 From: Joel Takvorian Date: Mon, 16 Feb 2026 11:43:04 +0100 Subject: [PATCH 27/59] Update OVN observability documentation - Mention where to find `ovnkube-observ` - Mention upstream netobserv as supporting this feature - Remove old warning about rebuilding the kernel - Fix some markdown issues on lists Signed-off-by: Joel Takvorian --- docs/observability/ovn-observability.md | 33 +++++++++++++++++++------ 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/docs/observability/ovn-observability.md b/docs/observability/ovn-observability.md index 5810fea58a..be5113118d 100644 --- a/docs/observability/ovn-observability.md +++ b/docs/observability/ovn-observability.md @@ -7,6 +7,7 @@ specific OVS flows are matched. To see the generated samples, a binary called `o This binary allows printing the samples to stdout or writing them to a file. Currently, supports observability for: + - Network Policy - (Baseline) Admin Network Policy - Egress firewall @@ -37,16 +38,28 @@ insights of what ovn-kubernetes is doing with a packet and why. To enable this feature, use `--observability` flag with `kind.sh` script or `--enable-observability` flag with `ovnkube` binary. -To see the samples, use `ovnkube-observ` binary, use `-h` to see allowed flags. +To see the samples, use `ovnkube-observ` binary, with `-h` to see allowed flags. `ovnkube-observ` is installed on the ovnkube pods. For example: -This feature requires OVS 3.4 and linux kernel 6.11. +``` +kubectl -n ovn-kubernetes exec -it -c ovnkube-controller -- ovnkube-observ -h +Usage of ovnkube-observ: + -add-ovs-collector + Add ovs collector to enable sampling. Use with caution. Make sure no one else is using observability. + -enable-enrichment + Enrich samples with nbdb data. (default true) + -filter-dst-ip string + Filter in only packets to a given destination ip. + -filter-src-ip string + Filter in only packets from a given source ip. + -log-cookie + Print raw sample cookie with psample group_id. + -output-file string + Output file to write the samples to. + -print-full-packet + Print full received packet. When false, only src and dst ips are printed with every sample. +``` -As of Aug 2024, the kernel need to be built from the source, therefore to try this feature you need to: -- rebuild the kernel with the current master branch from [Linus' tree](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git) - - to rebuild on fedora: https://docs.fedoraproject.org/en-US/quick-docs/kernel-build-custom/#_building_a_vanilla_upstream_kernel -- Build an ovn-kubernetes image that uses the latest OVS/OVN code: -`OVS_BRANCH=main make -C dist/images fedora-dev-local-gw-deployment` -- Start kind with that image, use `-ov localhost/ovn-daemonset-fedora:latest` flag with `kind.sh` script. +This feature requires OVS 3.4 and linux kernel 6.11. ## Workflow Description @@ -59,6 +72,10 @@ OVN-K message: Allowed by default allow from local node policy, direction ingres src=10.129.2.2, dst=10.129.2.5 ``` +## Support in observability tools + +- [NetObserv](https://github.com/netobserv/network-observability-operator): through the `NetworkEvents` agent feature. + ## Implementation Details ### User facing API Changes From 447b8d18235f5cac973c89348bee86576b46e9f9 Mon Sep 17 00:00:00 2001 From: Joel Takvorian Date: Tue, 17 Feb 2026 12:02:57 +0100 Subject: [PATCH 28/59] Mention -add-ovs-collector to start sampling Signed-off-by: Joel Takvorian --- docs/observability/ovn-observability.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/observability/ovn-observability.md b/docs/observability/ovn-observability.md index be5113118d..b79cfa6e39 100644 --- a/docs/observability/ovn-observability.md +++ b/docs/observability/ovn-observability.md @@ -65,8 +65,8 @@ This feature requires OVS 3.4 and linux kernel 6.11. - Observability is enabled by setting the `--enable-observability` flag in the `ovnkube` binary. - For now all mentioned features are enabled by this flag at the same time. -- `ovnkube-observ` binary is used to see the samples. Samples are only generated when the real traffic matching the ACLs -is sent through the OVS. An example output is: +- To start observing and display the samples, run `ovnkube-observ -add-ovs-collector`. Samples are only generated when the real traffic matching the ACLs is sent through the OVS. An example output is: + ``` OVN-K message: Allowed by default allow from local node policy, direction ingress src=10.129.2.2, dst=10.129.2.5 From d9888a121225fafc78bd6629baa530c5f9121e3e Mon Sep 17 00:00:00 2001 From: Ayushi Chouhan Date: Wed, 18 Feb 2026 15:48:20 +0530 Subject: [PATCH 29/59] fix live migration link navigation Signed-off-by: Ayushi Chouhan --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 1358da3b80..528753cce6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,7 +82,7 @@ nav: - Host To NodePort Hairpin: design/host-to-node-port-hairpin-trafficflow.md - ExternalIPs/LoadBalancerIngress: design/external-ip-and-loadbalancer-ingress.md - Internal Subnets: design/ovn-kubernetes-subnets.md - - Kubevirt VM Live Migration: design/live-migration.md + - Kubevirt VM Live Migration: features/live-migration.md - Getting Started: - Launching OVN-Kubernetes: installation/launching-ovn-kubernetes-on-kind.md - Launching OVN-Kubernetes Using Helm: installation/launching-ovn-kubernetes-with-helm.md From 32cbabcd66662bce02eb60b3e1631f0e6be91156 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 12 Feb 2026 11:57:11 +0100 Subject: [PATCH 30/59] Speed up BGP isolation e2e tests Create all pods non-blocking before waiting for readiness, instead of sequential CreateSync calls. Reduce Consistently timeout for negative connectivity checks from 15s to 5s and curl max-time from 2s to 1s. Signed-off-by: Patryk Diak --- test/e2e/route_advertisements.go | 109 ++++++++++++++++++------------- 1 file changed, 62 insertions(+), 47 deletions(-) diff --git a/test/e2e/route_advertisements.go b/test/e2e/route_advertisements.go index c9335c2a95..d52ecb299a 100644 --- a/test/e2e/route_advertisements.go +++ b/test/e2e/route_advertisements.go @@ -966,84 +966,99 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" max := 25999 hostNetworkPort = rand.Intn(max-min+1) + min framework.Logf("Random host networked port chosen: %d", hostNetworkPort) + + ginkgo.By("Setting up pods and services") + + // Create all pod specs upfront as distinct objects. + var hostNetPods []*corev1.Pod for _, node := range nodes.Items { - // this creates a udp / http netexec listener which is able to receive the "hostname" - // command. We use this to validate that each endpoint is received at least once - args := []string{ + p := e2epod.NewAgnhostPod(f.Namespace.Name, node.Name+"-hostnet-ep", nil, nil, nil, "netexec", fmt.Sprintf("--http-port=%d", hostNetworkPort), - fmt.Sprintf("--udp-port=%d", hostNetworkPort), - } - - // create host networked Pods - _, err := createPod(f, node.Name+"-hostnet-ep", node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *corev1.Pod) { - p.Spec.Containers[0].Args = args - p.Spec.HostNetwork = true - }) + fmt.Sprintf("--udp-port=%d", hostNetworkPort)) + p.Spec.NodeName = node.Name + p.Spec.HostNetwork = true + hostNetPods = append(hostNetPods, e2epod.NewPodClient(f).Create(context.TODO(), p)) + } - framework.ExpectNoError(err) + podNetASpecs := []*corev1.Pod{ + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-2-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-3-%s-net-%s", nodes.Items[1].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), } + for _, p := range podNetASpecs { + p.Spec.NodeName = nodes.Items[0].Name + p.Labels = map[string]string{"network": cudnA.Name} + } + podNetASpecs[2].Spec.NodeName = nodes.Items[1].Name - ginkgo.By("Setting up pods and services") - podsNetA = []*corev1.Pod{} - pod := e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") - pod.Spec.NodeName = nodes.Items[0].Name - pod.Labels = map[string]string{"network": cudnA.Name} - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) + podNetBSpec := e2epod.NewAgnhostPod(udnNamespaceB.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[1].Name, cudnB.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") + podNetBSpec.Spec.NodeName = nodes.Items[1].Name + podNetBSpec.Labels = map[string]string{"network": cudnB.Name} + + podNetDefaultSpec := e2epod.NewAgnhostPod("default", fmt.Sprintf("pod-1-%s-net-default", nodes.Items[1].Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") + podNetDefaultSpec.Spec.NodeName = nodes.Items[1].Name + podNetDefaultSpec.Labels = map[string]string{"network": "default"} - pod.Name = fmt.Sprintf("pod-2-%s-net-%s", nodes.Items[0].Name, cudnA.Name) - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) + // Submit all pods to the API without waiting for readiness. + podsNetA = []*corev1.Pod{} + for _, p := range podNetASpecs { + podsNetA = append(podsNetA, e2epod.NewPodClient(f).Create(context.TODO(), p)) + } + podNetB = e2epod.PodClientNS(f, udnNamespaceB.Name).Create(context.TODO(), podNetBSpec) + podNetDefault = e2epod.PodClientNS(f, "default").Create(context.TODO(), podNetDefaultSpec) - pod.Name = fmt.Sprintf("pod-3-%s-net-%s", nodes.Items[1].Name, cudnA.Name) - pod.Spec.NodeName = nodes.Items[1].Name - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) + // Create services (don't need pods to be ready). + familyPolicy := corev1.IPFamilyPolicyPreferDualStack - svc := e2eservice.CreateServiceSpec(fmt.Sprintf("service-%s", cudnA.Name), "", false, pod.Labels) + svc := e2eservice.CreateServiceSpec(fmt.Sprintf("service-%s", cudnA.Name), "", false, map[string]string{"network": cudnA.Name}) svc.Spec.Ports = []corev1.ServicePort{{Port: 8080}} - familyPolicy := corev1.IPFamilyPolicyPreferDualStack svc.Spec.IPFamilyPolicy = &familyPolicy svc.Spec.Type = corev1.ServiceTypeNodePort - svcNodePortNetA, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) + svcNodePortNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - pod.Name = fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[1].Name, cudnB.Name) - pod.Namespace = udnNamespaceB.Name - pod.Labels = map[string]string{"network": cudnB.Name} - podNetB = e2epod.PodClientNS(f, udnNamespaceB.Name).CreateSync(context.TODO(), pod) - framework.Logf("created pod %s/%s", podNetB.Namespace, podNetB.Name) - svc.Name = fmt.Sprintf("service-%s", cudnB.Name) - svc.Namespace = pod.Namespace - svc.Spec.Selector = pod.Labels - svcNodePortNetB, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) + svc.Namespace = udnNamespaceB.Name + svc.Spec.Selector = map[string]string{"network": cudnB.Name} + svcNodePortNetB, err = f.ClientSet.CoreV1().Services(udnNamespaceB.Name).Create(context.Background(), svc, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - pod.Name = fmt.Sprintf("pod-1-%s-net-default", nodes.Items[1].Name) - pod.Namespace = "default" - pod.Labels = map[string]string{"network": "default"} - podNetDefault = e2epod.PodClientNS(f, "default").CreateSync(context.TODO(), pod) - svc.Name = "service-default" svc.Namespace = "default" - svc.Spec.Selector = pod.Labels + svc.Spec.Selector = map[string]string{"network": "default"} svc.Spec.Type = corev1.ServiceTypeNodePort - svcNodePortNetDefault, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) + svcNodePortNetDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) // create one nodePort service with externalTrafficPolicy=Local in default namespace svc.Name = "nodeport-default-etp-local" - svc.Spec.Type = corev1.ServiceTypeNodePort svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal - svcNodePortETPLocalDefault, err = f.ClientSet.CoreV1().Services(svc.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) + svcNodePortETPLocalDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) // create one nodePort service with externalTrafficPolicy=Local in udnNamespaceA svc.Name = fmt.Sprintf("nodeport-etp-local-%s", cudnA.Name) svc.Namespace = udnNamespaceA.Name svc.Spec.Selector = map[string]string{"network": cudnA.Name} - svcNodePortETPLocalNetA, err = f.ClientSet.CoreV1().Services(svc.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) + svcNodePortETPLocalNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Wait for all pods to be ready (they've been scheduling in parallel). + for _, p := range append(hostNetPods, append(podsNetA, podNetB, podNetDefault)...) { + framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, p.Name, p.Namespace, framework.PodStartTimeout)) + } + // Re-get pods to have updated status (e.g. pod IPs). + for i, p := range podsNetA { + podsNetA[i], err = f.ClientSet.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + } + podNetB, err = f.ClientSet.CoreV1().Pods(podNetB.Namespace).Get(context.TODO(), podNetB.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + framework.Logf("created pod %s/%s", podNetB.Namespace, podNetB.Name) + podNetDefault, err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Get(context.TODO(), podNetDefault.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + ginkgo.By("Expose networks") ra = &rav1.RouteAdvertisements{ ObjectMeta: metav1.ObjectMeta{ @@ -1160,7 +1175,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" // to targetAddress. If clientNamespace is empty the function assumes clientName is a node that will be used as the // client. var checkConnectivity = func(clientName, clientNamespace, targetAddress string) (string, error) { - curlCmd := []string{"curl", "-g", "-q", "-s", "--max-time", "2", "--insecure", targetAddress} + curlCmd := []string{"curl", "-g", "-q", "-s", "--max-time", "1", "--insecure", targetAddress} var out string var err error if clientNamespace != "" { @@ -1193,7 +1208,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" if expectErr { // When the connectivity check is expected to fail it should be failing consistently asyncAssertion = gomega.Consistently - timeout = time.Second * 15 + timeout = time.Second * 5 } asyncAssertion(func() error { out, err := checkConnectivity(clientName, clientNamespace, dst) From 24efc26e1aa68898532c3479224a46c8ee3fe1a4 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 12 Feb 2026 14:12:36 +0100 Subject: [PATCH 31/59] Reuse test setup across BGP isolation table entries The BGP isolation table entries are read-only and can share a single setup/teardown cycle. Wrap the DescribeTable in an Ordered context with BeforeAll/AfterAll instead of BeforeEach/AfterEach. Signed-off-by: Patryk Diak --- test/e2e/route_advertisements.go | 1565 +++++++++++++++--------------- 1 file changed, 798 insertions(+), 767 deletions(-) diff --git a/test/e2e/route_advertisements.go b/test/e2e/route_advertisements.go index d52ecb299a..b1cb225e52 100644 --- a/test/e2e/route_advertisements.go +++ b/test/e2e/route_advertisements.go @@ -898,829 +898,860 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" var cudnA, cudnB *udnv1.ClusterUserDefinedNetwork var ra *rav1.RouteAdvertisements var hostNetworkPort int - ginkgo.BeforeEach(func() { - ginkgo.By("Configuring primary UDN namespaces") - var err error - udnNamespaceA, err = f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, - RequiredUDNNamespaceLabel: "", - }) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - f.Namespace = udnNamespaceA - udnNamespaceB, err = f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, - RequiredUDNNamespaceLabel: "", - }) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.Context("", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() { + ginkgo.BeforeAll(func() { + ginkgo.By("Configuring primary UDN namespaces") + var err error + // Create namespaces directly via the API instead of f.CreateNamespace() + // to avoid framework cleaning them up in AfterEach + udnNamespaceA, err = f.ClientSet.CoreV1().Namespaces().Create(context.TODO(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: f.BaseName + "-", + Labels: map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }, + }, + }, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + f.Namespace = udnNamespaceA + udnNamespaceB, err = f.ClientSet.CoreV1().Namespaces().Create(context.TODO(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: f.BaseName + "-", + Labels: map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }, + }, + }, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Configuring networks") - cudnATemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: "kubernetes.io/metadata.name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{udnNamespaceA.Name}, - }}} - cudnBTemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: "kubernetes.io/metadata.name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{udnNamespaceB.Name}, - }}} + ginkgo.By("Configuring networks") + cudnATemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{udnNamespaceA.Name}, + }}} + cudnBTemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{udnNamespaceB.Name}, + }}} - // set a common label used to advertise both networks with one RA - cudnATemplate.Labels["advertised-networks-isolation"] = "" - cudnBTemplate.Labels["advertised-networks-isolation"] = "" + // set a common label used to advertise both networks with one RA + cudnATemplate.Labels["advertised-networks-isolation"] = "" + cudnBTemplate.Labels["advertised-networks-isolation"] = "" - udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - if cudnATemplate.Spec.Network.Layer3 != nil { - cudnATemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnATemplate.Spec.Network.Layer3.Subnets) - } - if cudnATemplate.Spec.Network.Layer2 != nil { - cudnATemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnATemplate.Spec.Network.Layer2.Subnets) - } - if cudnBTemplate.Spec.Network.Layer3 != nil { - cudnBTemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnBTemplate.Spec.Network.Layer3.Subnets) - } - if cudnBTemplate.Spec.Network.Layer2 != nil { - cudnBTemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnBTemplate.Spec.Network.Layer2.Subnets) - } + if cudnATemplate.Spec.Network.Layer3 != nil { + cudnATemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnATemplate.Spec.Network.Layer3.Subnets) + } + if cudnATemplate.Spec.Network.Layer2 != nil { + cudnATemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnATemplate.Spec.Network.Layer2.Subnets) + } + if cudnBTemplate.Spec.Network.Layer3 != nil { + cudnBTemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnBTemplate.Spec.Network.Layer3.Subnets) + } + if cudnBTemplate.Spec.Network.Layer2 != nil { + cudnBTemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnBTemplate.Spec.Network.Layer2.Subnets) + } - cudnA, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnATemplate, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + cudnA, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnATemplate, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - cudnB, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnBTemplate, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + cudnB, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnBTemplate, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Waiting for networks to be ready") - gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnA.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) - gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnB.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) + ginkgo.By("Waiting for networks to be ready") + gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnA.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) + gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnB.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) - ginkgo.By("Selecting 3 schedulable nodes") - nodes, err = e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2)) - // create host networked pod - ginkgo.By("Creating host network pods on each node") - // get random port in case the test retries and port is already in use on host node - min := 25000 - max := 25999 - hostNetworkPort = rand.Intn(max-min+1) + min - framework.Logf("Random host networked port chosen: %d", hostNetworkPort) - - ginkgo.By("Setting up pods and services") - - // Create all pod specs upfront as distinct objects. - var hostNetPods []*corev1.Pod - for _, node := range nodes.Items { - p := e2epod.NewAgnhostPod(f.Namespace.Name, node.Name+"-hostnet-ep", nil, nil, nil, - "netexec", - fmt.Sprintf("--http-port=%d", hostNetworkPort), - fmt.Sprintf("--udp-port=%d", hostNetworkPort)) - p.Spec.NodeName = node.Name - p.Spec.HostNetwork = true - hostNetPods = append(hostNetPods, e2epod.NewPodClient(f).Create(context.TODO(), p)) - } + ginkgo.By("Selecting 3 schedulable nodes") + nodes, err = e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2)) + // create host networked pod + ginkgo.By("Creating host network pods on each node") + // get random port in case the test retries and port is already in use on host node + min := 25000 + max := 25999 + hostNetworkPort = rand.Intn(max-min+1) + min + framework.Logf("Random host networked port chosen: %d", hostNetworkPort) + + ginkgo.By("Setting up pods and services") + + // Create all pod specs upfront as distinct objects. + var hostNetPods []*corev1.Pod + for _, node := range nodes.Items { + p := e2epod.NewAgnhostPod(f.Namespace.Name, node.Name+"-hostnet-ep", nil, nil, nil, + "netexec", + fmt.Sprintf("--http-port=%d", hostNetworkPort), + fmt.Sprintf("--udp-port=%d", hostNetworkPort)) + p.Spec.NodeName = node.Name + p.Spec.HostNetwork = true + hostNetPods = append(hostNetPods, e2epod.NewPodClient(f).Create(context.TODO(), p)) + } - podNetASpecs := []*corev1.Pod{ - e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), - e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-2-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), - e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-3-%s-net-%s", nodes.Items[1].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), - } - for _, p := range podNetASpecs { - p.Spec.NodeName = nodes.Items[0].Name - p.Labels = map[string]string{"network": cudnA.Name} - } - podNetASpecs[2].Spec.NodeName = nodes.Items[1].Name + podNetASpecs := []*corev1.Pod{ + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-2-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-3-%s-net-%s", nodes.Items[1].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + } + for _, p := range podNetASpecs { + p.Spec.NodeName = nodes.Items[0].Name + p.Labels = map[string]string{"network": cudnA.Name} + } + podNetASpecs[2].Spec.NodeName = nodes.Items[1].Name - podNetBSpec := e2epod.NewAgnhostPod(udnNamespaceB.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[1].Name, cudnB.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") - podNetBSpec.Spec.NodeName = nodes.Items[1].Name - podNetBSpec.Labels = map[string]string{"network": cudnB.Name} + podNetBSpec := e2epod.NewAgnhostPod(udnNamespaceB.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[1].Name, cudnB.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") + podNetBSpec.Spec.NodeName = nodes.Items[1].Name + podNetBSpec.Labels = map[string]string{"network": cudnB.Name} - podNetDefaultSpec := e2epod.NewAgnhostPod("default", fmt.Sprintf("pod-1-%s-net-default", nodes.Items[1].Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") - podNetDefaultSpec.Spec.NodeName = nodes.Items[1].Name - podNetDefaultSpec.Labels = map[string]string{"network": "default"} + podNetDefaultSpec := e2epod.NewAgnhostPod("default", fmt.Sprintf("pod-1-%s-net-default", nodes.Items[1].Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") + podNetDefaultSpec.Spec.NodeName = nodes.Items[1].Name + podNetDefaultSpec.Labels = map[string]string{"network": "default"} - // Submit all pods to the API without waiting for readiness. - podsNetA = []*corev1.Pod{} - for _, p := range podNetASpecs { - podsNetA = append(podsNetA, e2epod.NewPodClient(f).Create(context.TODO(), p)) - } - podNetB = e2epod.PodClientNS(f, udnNamespaceB.Name).Create(context.TODO(), podNetBSpec) - podNetDefault = e2epod.PodClientNS(f, "default").Create(context.TODO(), podNetDefaultSpec) + // Submit all pods to the API without waiting for readiness. + podsNetA = []*corev1.Pod{} + for _, p := range podNetASpecs { + podsNetA = append(podsNetA, e2epod.NewPodClient(f).Create(context.TODO(), p)) + } + podNetB = e2epod.PodClientNS(f, udnNamespaceB.Name).Create(context.TODO(), podNetBSpec) + podNetDefault = e2epod.PodClientNS(f, "default").Create(context.TODO(), podNetDefaultSpec) - // Create services (don't need pods to be ready). - familyPolicy := corev1.IPFamilyPolicyPreferDualStack + // Create services (don't need pods to be ready). + familyPolicy := corev1.IPFamilyPolicyPreferDualStack - svc := e2eservice.CreateServiceSpec(fmt.Sprintf("service-%s", cudnA.Name), "", false, map[string]string{"network": cudnA.Name}) - svc.Spec.Ports = []corev1.ServicePort{{Port: 8080}} - svc.Spec.IPFamilyPolicy = &familyPolicy - svc.Spec.Type = corev1.ServiceTypeNodePort - svcNodePortNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svc := e2eservice.CreateServiceSpec(fmt.Sprintf("service-%s", cudnA.Name), "", false, map[string]string{"network": cudnA.Name}) + svc.Spec.Ports = []corev1.ServicePort{{Port: 8080}} + svc.Spec.IPFamilyPolicy = &familyPolicy + svc.Spec.Type = corev1.ServiceTypeNodePort + svcNodePortNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svc.Name = fmt.Sprintf("service-%s", cudnB.Name) - svc.Namespace = udnNamespaceB.Name - svc.Spec.Selector = map[string]string{"network": cudnB.Name} - svcNodePortNetB, err = f.ClientSet.CoreV1().Services(udnNamespaceB.Name).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svc.Name = fmt.Sprintf("service-%s", cudnB.Name) + svc.Namespace = udnNamespaceB.Name + svc.Spec.Selector = map[string]string{"network": cudnB.Name} + svcNodePortNetB, err = f.ClientSet.CoreV1().Services(udnNamespaceB.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svc.Name = "service-default" - svc.Namespace = "default" - svc.Spec.Selector = map[string]string{"network": "default"} - svc.Spec.Type = corev1.ServiceTypeNodePort - svcNodePortNetDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svc.Name = "service-default" + svc.Namespace = "default" + svc.Spec.Selector = map[string]string{"network": "default"} + svc.Spec.Type = corev1.ServiceTypeNodePort + svcNodePortNetDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // create one nodePort service with externalTrafficPolicy=Local in default namespace - svc.Name = "nodeport-default-etp-local" - svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal - svcNodePortETPLocalDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // create one nodePort service with externalTrafficPolicy=Local in default namespace + svc.Name = "nodeport-default-etp-local" + svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal + svcNodePortETPLocalDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // create one nodePort service with externalTrafficPolicy=Local in udnNamespaceA - svc.Name = fmt.Sprintf("nodeport-etp-local-%s", cudnA.Name) - svc.Namespace = udnNamespaceA.Name - svc.Spec.Selector = map[string]string{"network": cudnA.Name} - svcNodePortETPLocalNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // create one nodePort service with externalTrafficPolicy=Local in udnNamespaceA + svc.Name = fmt.Sprintf("nodeport-etp-local-%s", cudnA.Name) + svc.Namespace = udnNamespaceA.Name + svc.Spec.Selector = map[string]string{"network": cudnA.Name} + svcNodePortETPLocalNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Wait for all pods to be ready (they've been scheduling in parallel). - for _, p := range append(hostNetPods, append(podsNetA, podNetB, podNetDefault)...) { - framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, p.Name, p.Namespace, framework.PodStartTimeout)) - } - // Re-get pods to have updated status (e.g. pod IPs). - for i, p := range podsNetA { - podsNetA[i], err = f.ClientSet.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{}) + // Wait for all pods to be ready (they've been scheduling in parallel). + for _, p := range append(hostNetPods, append(podsNetA, podNetB, podNetDefault)...) { + framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, p.Name, p.Namespace, framework.PodStartTimeout)) + } + // Re-get pods to have updated status (e.g. pod IPs). + for i, p := range podsNetA { + podsNetA[i], err = f.ClientSet.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + } + podNetB, err = f.ClientSet.CoreV1().Pods(podNetB.Namespace).Get(context.TODO(), podNetB.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + framework.Logf("created pod %s/%s", podNetB.Namespace, podNetB.Name) + podNetDefault, err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Get(context.TODO(), podNetDefault.Name, metav1.GetOptions{}) framework.ExpectNoError(err) - } - podNetB, err = f.ClientSet.CoreV1().Pods(podNetB.Namespace).Get(context.TODO(), podNetB.Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - framework.Logf("created pod %s/%s", podNetB.Namespace, podNetB.Name) - podNetDefault, err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Get(context.TODO(), podNetDefault.Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - ginkgo.By("Expose networks") - ra = &rav1.RouteAdvertisements{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "advertised-networks-isolation-ra", - }, - Spec: rav1.RouteAdvertisementsSpec{ - NetworkSelectors: apitypes.NetworkSelectors{ - apitypes.NetworkSelector{ - NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, - ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ - NetworkSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{"advertised-networks-isolation": ""}, + ginkgo.By("Expose networks") + ra = &rav1.RouteAdvertisements{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "advertised-networks-isolation-ra", + }, + Spec: rav1.RouteAdvertisementsSpec{ + NetworkSelectors: apitypes.NetworkSelectors{ + apitypes.NetworkSelector{ + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"advertised-networks-isolation": ""}, + }, }, }, }, + NodeSelector: metav1.LabelSelector{}, + FRRConfigurationSelector: metav1.LabelSelector{}, + Advertisements: []rav1.AdvertisementType{ + rav1.PodNetwork, + }, }, - NodeSelector: metav1.LabelSelector{}, - FRRConfigurationSelector: metav1.LabelSelector{}, - Advertisements: []rav1.AdvertisementType{ - rav1.PodNetwork, - }, - }, - } - - raClient, err := raclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } - ra, err = raClient.K8sV1().RouteAdvertisements().Create(context.TODO(), ra, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + raClient, err := raclientset.NewForConfig(f.ClientConfig()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("ensure route advertisement matching both networks was created successfully") - gomega.Eventually(func() string { - ra, err := raClient.K8sV1().RouteAdvertisements().Get(context.TODO(), ra.Name, metav1.GetOptions{}) - if err != nil { - return "" - } - condition := meta.FindStatusCondition(ra.Status.Conditions, "Accepted") - if condition == nil { - return "" - } - return condition.Reason - }, 30*time.Second, time.Second).Should(gomega.Equal("Accepted")) + ra, err = raClient.K8sV1().RouteAdvertisements().Create(context.TODO(), ra, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("ensure routes from UDNs are learned by the external FRR router") - serverContainerIPs := getBGPServerContainerIPs(f) - for _, serverContainerIP := range serverContainerIPs { - for _, node := range nodes.Items { - if cudnA.Spec.Network.Topology == udnv1.NetworkTopologyLayer3 { - checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnATemplate.Name) - checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnBTemplate.Name) - } else { - checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnATemplate.Spec.Network.Layer2.Subnets) - checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnBTemplate.Spec.Network.Layer2.Subnets) + ginkgo.By("ensure route advertisement matching both networks was created successfully") + gomega.Eventually(func() string { + ra, err := raClient.K8sV1().RouteAdvertisements().Get(context.TODO(), ra.Name, metav1.GetOptions{}) + if err != nil { + return "" + } + condition := meta.FindStatusCondition(ra.Status.Conditions, "Accepted") + if condition == nil { + return "" + } + return condition.Reason + }, 30*time.Second, time.Second).Should(gomega.Equal("Accepted")) + + ginkgo.By("ensure routes from UDNs are learned by the external FRR router") + serverContainerIPs := getBGPServerContainerIPs(f) + for _, serverContainerIP := range serverContainerIPs { + for _, node := range nodes.Items { + if cudnA.Spec.Network.Topology == udnv1.NetworkTopologyLayer3 { + checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnATemplate.Name) + checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnBTemplate.Name) + } else { + checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnATemplate.Spec.Network.Layer2.Subnets) + checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnBTemplate.Spec.Network.Layer2.Subnets) + } } } - } - }) - - ginkgo.AfterEach(func() { - gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceA.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) - gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceB.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + }) - udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - if cudnB != nil { - err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnB.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() bool { - _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnB.Name, metav1.GetOptions{}) - return apierrors.IsNotFound(err) - }, time.Second*60).Should(gomega.BeTrue()) - cudnB = nil - } - if cudnA != nil { - err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnA.Name, metav1.DeleteOptions{}) + ginkgo.AfterAll(func() { + if udnNamespaceA != nil { + gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceA.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + } + if udnNamespaceB != nil { + gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceB.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + } + udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() bool { - _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnA.Name, metav1.GetOptions{}) - return apierrors.IsNotFound(err) - }, time.Second*60).Should(gomega.BeTrue()) - cudnA = nil - } + if cudnB != nil { + err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnB.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnB.Name, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }, time.Second*60).Should(gomega.BeTrue()) + cudnB = nil + } + if cudnA != nil { + err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnA.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnA.Name, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }, time.Second*60).Should(gomega.BeTrue()) + cudnA = nil + } - if podNetDefault != nil { - err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Delete(context.Background(), podNetDefault.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - podNetDefault = nil - } + if podNetDefault != nil { + err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Delete(context.Background(), podNetDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podNetDefault = nil + } - if svcNodePortNetDefault != nil { - err = f.ClientSet.CoreV1().Services(svcNodePortNetDefault.Namespace).Delete(context.Background(), svcNodePortNetDefault.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svcNodePortNetDefault = nil - } - if svcNodePortETPLocalDefault != nil { - err = f.ClientSet.CoreV1().Services(svcNodePortETPLocalDefault.Namespace).Delete(context.Background(), svcNodePortETPLocalDefault.Name, metav1.DeleteOptions{}) + if svcNodePortNetDefault != nil { + err = f.ClientSet.CoreV1().Services(svcNodePortNetDefault.Namespace).Delete(context.Background(), svcNodePortNetDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svcNodePortNetDefault = nil + } + if svcNodePortETPLocalDefault != nil { + err = f.ClientSet.CoreV1().Services(svcNodePortETPLocalDefault.Namespace).Delete(context.Background(), svcNodePortETPLocalDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svcNodePortETPLocalDefault = nil + } + + raClient, err := raclientset.NewForConfig(f.ClientConfig()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svcNodePortETPLocalDefault = nil - } - raClient, err := raclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + if ra != nil { + err = raClient.K8sV1().RouteAdvertisements().Delete(context.TODO(), ra.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ra = nil + } - if ra != nil { - err = raClient.K8sV1().RouteAdvertisements().Delete(context.TODO(), ra.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ra = nil - } - }) + // Delete the namespaces manually since they were created directly + // via the API (not via f.CreateNamespace) to avoid framework's + // AfterEach cleanup. + if udnNamespaceA != nil { + err = f.ClientSet.CoreV1().Namespaces().Delete(context.Background(), udnNamespaceA.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnNamespaceA = nil + } + if udnNamespaceB != nil { + err = f.ClientSet.CoreV1().Namespaces().Delete(context.Background(), udnNamespaceB.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnNamespaceB = nil + } + }) - ginkgo.DescribeTable("connectivity between networks", - func(connInfo func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool)) { - // checkConnectivity performs a curl command from a specified client (pod or node) - // to targetAddress. If clientNamespace is empty the function assumes clientName is a node that will be used as the - // client. - var checkConnectivity = func(clientName, clientNamespace, targetAddress string) (string, error) { - curlCmd := []string{"curl", "-g", "-q", "-s", "--max-time", "1", "--insecure", targetAddress} - var out string - var err error - if clientNamespace != "" { - framework.Logf("Attempting connectivity from pod: %s/%s -> %s", clientNamespace, clientName, targetAddress) - stdout, stderr, err := e2epodoutput.RunHostCmdWithFullOutput(clientNamespace, clientName, strings.Join(curlCmd, " ")) - out = stdout + "\n" + stderr - if err != nil { - return out, fmt.Errorf("connectivity check failed from Pod %s/%s to %s: %w", clientNamespace, clientName, targetAddress, err) - } - } else { - framework.Logf("Attempting connectivity from node: %s -> %s", clientName, targetAddress) - out, err = infraprovider.Get().ExecK8NodeCommand(clientName, curlCmd) - if err != nil { - // out is empty on error and error contains out... - return err.Error(), fmt.Errorf("connectivity check failed from node %s to %s: %w", clientName, targetAddress, err) + ginkgo.DescribeTable("connectivity between networks", + func(connInfo func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool)) { + // checkConnectivity performs a curl command from a specified client (pod or node) + // to targetAddress. If clientNamespace is empty the function assumes clientName is a node that will be used as the + // client. + var checkConnectivity = func(clientName, clientNamespace, targetAddress string) (string, error) { + curlCmd := []string{"curl", "-g", "-q", "-s", "--max-time", "1", "--insecure", targetAddress} + var out string + var err error + if clientNamespace != "" { + framework.Logf("Attempting connectivity from pod: %s/%s -> %s", clientNamespace, clientName, targetAddress) + stdout, stderr, err := e2epodoutput.RunHostCmdWithFullOutput(clientNamespace, clientName, strings.Join(curlCmd, " ")) + out = stdout + "\n" + stderr + if err != nil { + return out, fmt.Errorf("connectivity check failed from Pod %s/%s to %s: %w", clientNamespace, clientName, targetAddress, err) + } + } else { + framework.Logf("Attempting connectivity from node: %s -> %s", clientName, targetAddress) + out, err = infraprovider.Get().ExecK8NodeCommand(clientName, curlCmd) + if err != nil { + // out is empty on error and error contains out... + return err.Error(), fmt.Errorf("connectivity check failed from node %s to %s: %w", clientName, targetAddress, err) + } } - } - client := clientName - if clientNamespace != "" { - client = clientNamespace + "/" + client - } - framework.Logf("Connectivity check successful:'%s' -> %s", client, targetAddress) - return out, nil - } - for _, ipFamily := range getSupportedIPFamiliesSlice(f.ClientSet) { - clientName, clientNamespace, dst, expectedOutput, expectErr := connInfo(ipFamily) - asyncAssertion := gomega.Eventually - timeout := time.Second * 30 - if expectErr { - // When the connectivity check is expected to fail it should be failing consistently - asyncAssertion = gomega.Consistently - timeout = time.Second * 5 + client := clientName + if clientNamespace != "" { + client = clientNamespace + "/" + client + } + framework.Logf("Connectivity check successful:'%s' -> %s", client, targetAddress) + return out, nil } - asyncAssertion(func() error { - out, err := checkConnectivity(clientName, clientNamespace, dst) - if expectErr != (err != nil) { - return fmt.Errorf("expected connectivity check to return error(%t), got %v, output %v", expectErr, err, out) + for _, ipFamily := range getSupportedIPFamiliesSlice(f.ClientSet) { + clientName, clientNamespace, dst, expectedOutput, expectErr := connInfo(ipFamily) + asyncAssertion := gomega.Eventually + timeout := time.Second * 30 + if expectErr { + // When the connectivity check is expected to fail it should be failing consistently + asyncAssertion = gomega.Consistently + timeout = time.Second * 5 } - if expectedOutput != "" { - if !strings.Contains(out, expectedOutput) { - return fmt.Errorf("expected connectivity check to contain %q, got %q", expectedOutput, out) + asyncAssertion(func() error { + out, err := checkConnectivity(clientName, clientNamespace, dst) + if expectErr != (err != nil) { + return fmt.Errorf("expected connectivity check to return error(%t), got %v, output %v", expectErr, err, out) } - } - return nil - }, timeout).Should(gomega.BeNil()) - } - }, - ginkgo.Entry("pod to pod on the same network and same node should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[1] are on the same node - clientPod := podsNetA[0] - srvPod := podsNetA[1] + if expectedOutput != "" { + if !strings.Contains(out, expectedOutput) { + return fmt.Errorf("expected connectivity check to contain %q, got %q", expectedOutput, out) + } + } + return nil + }, timeout).Should(gomega.BeNil()) + } + }, + ginkgo.Entry("pod to pod on the same network and same node should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[1] are on the same node + clientPod := podsNetA[0] + srvPod := podsNetA[1] - clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false - }), - ginkgo.Entry("pod to pod on the same network and different nodes should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[2] are on different nodes - clientPod := podsNetA[0] - srvPod := podsNetA[2] - - clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false - }), - ginkgo.Entry("pod to pod connectivity on different networks and same node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[2] and podNetB are on the same node - clientPod := podsNetA[2] - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - var ( - curlOutput string - curlErr bool - ) - // Test behavior depends on the ADVERTISED_UDN_ISOLATION_MODE environment variable: - // - "loose": Pod connectivity is allowed, test expects success - // - anything else (including unset): Treated as "strict", pod connectivity is blocked - if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) framework.ExpectNoError(err) + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false + }), + ginkgo.Entry("pod to pod on the same network and different nodes should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[2] are on different nodes + clientPod := podsNetA[0] + srvPod := podsNetA[2] - // With the above underlay routing configuration client pod can reach server pod. - curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() - curlErr = false - } else { - curlOutput = curlConnectionTimeoutCode - curlErr = true - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlOutput, curlErr - }), - - ginkgo.Entry("pod to pod connectivity on different networks and different nodes", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podNetB are on different nodes - clientPod := podsNetA[0] - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - var ( - curlOutput string - curlErr bool - ) - if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) framework.ExpectNoError(err) + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false + }), + ginkgo.Entry("pod to pod connectivity on different networks and same node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[2] and podNetB are on the same node + clientPod := podsNetA[2] + srvPod := podNetB + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + var ( + curlOutput string + curlErr bool + ) + // Test behavior depends on the ADVERTISED_UDN_ISOLATION_MODE environment variable: + // - "loose": Pod connectivity is allowed, test expects success + // - anything else (including unset): Treated as "strict", pod connectivity is blocked + if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { + clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + + // With the above underlay routing configuration client pod can reach server pod. + curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() + curlErr = false + } else { + curlOutput = curlConnectionTimeoutCode + curlErr = true + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlOutput, curlErr + }), - curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() - curlErr = false - } else { - curlOutput = curlConnectionTimeoutCode - curlErr = true - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlOutput, curlErr - }), - ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on the same node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault and podNetB are on the same node - clientPod := podNetDefault - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on a different node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault and podsNetA[0] are on different nodes - clientPod := podNetDefault - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the default network should not be able to access a UDN service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podNetDefault.Name, podNetDefault.Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the UDN should be able to access a service in the same network", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", "", false - }), - ginkgo.Entry("pod in the UDN should not be able to access a default network service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - err := true - out := curlConnectionTimeoutCode - if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { - // FIXME: prevent looping of traffic in L2 UDNs - // bad behaviour: packet is looping from management port -> breth0 -> GR -> management port -> breth0 and so on - // which is a never ending loop - // this causes curl timeout with code 7 host unreachable instead of code 28 - out = "" - } - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetDefault.Spec.ClusterIPs), "8080") + "/clientip", out, err - }), - ginkgo.Entry("pod in the UDN should be able to access kapi in default network service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, "https://kubernetes.default/healthz", "", false - }), - ginkgo.Entry("pod in the UDN should be able to access kapi service cluster IP directly", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // Get kubernetes service from default namespace - kubernetesService, err := f.ClientSet.CoreV1().Services("default").Get(context.TODO(), "kubernetes", metav1.GetOptions{}) - framework.ExpectNoError(err, "should be able to get kubernetes service") - - // NOTE: See https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/2438-dual-stack-apiserver - // Today the kubernetes.default service is single-stack and cannot be dual-stack. - if isDualStackCluster(nodes) && ipFamily == utilnet.IPv6 { - e2eskipper.Skipf("Dual stack kubernetes.default service is not supported in kubernetes") - } - // Get the cluster IP for the specified IP family - clusterIP := getFirstIPStringOfFamily(ipFamily, kubernetesService.Spec.ClusterIPs) - gomega.Expect(clusterIP).NotTo(gomega.BeEmpty(), fmt.Sprintf("no cluster IP available for IP family %v", ipFamily)) - - // Access the kubernetes API at the cluster IP directly on port 443 - return podsNetA[0].Name, podsNetA[0].Namespace, fmt.Sprintf("https://%s/healthz", net.JoinHostPort(clusterIP, "443")), "", false - }), - ginkgo.Entry("pod in the UDN should not be able to access a service in a different UDN", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetB.Spec.ClusterIPs), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("host to a local UDN pod should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientNode := podsNetA[0].Spec.NodeName - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("host to a different node UDN pod should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[2] are on different nodes - clientNode := podsNetA[2].Spec.NodeName - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("UDN pod to local node should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - // FIXME: add the host process socket to the VRF for this test to work. - // This scenario is something that is not supported yet. So the test will continue to fail. - // This works the same on both normal UDNs and advertised UDNs. - // So because the process is not bound to the VRF, packet reaches the host but kernel sends a RESET. So its not code 28 but code7. - // 10:59:55.351067 319594f193d4d_3 P ifindex 191 0a:58:5d:5d:01:05 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 64, id 57264, - // offset 0, flags [DF], proto TCP (6), length 60) - // 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0x0aa5 (incorrect -> 0xe0b7), seq 3879759281, win 65280, - // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 - // 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264, - // offset 0, flags [DF], proto TCP (6), length 60) - // 169.154.169.12.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280, - // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 - // 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, - // offset 0, flags [DF], proto TCP (6), length 40) - // 172.18.0.2.25022 > 169.154.169.12.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0 - // 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, - // offset 0, flags [DF], proto TCP (6), length 40) - // 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0 - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", "", true - }), - ginkgo.Entry("UDN pod to a different node should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] and podsNetA[2] are on different nodes so we can pick the node of podsNetA[2] as the different node destination - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } + ginkgo.Entry("pod to pod connectivity on different networks and different nodes", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podNetB are on different nodes + clientPod := podsNetA[0] + srvPod := podNetB - clientNode, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - clientNodeIPv4, clientNodeIPv6 := getNodeAddresses(clientNode) - clientNodeIP := clientNodeIPv4 - if ipFamily == utilnet.IPv6 { - clientNodeIP = clientNodeIPv6 - } - // pod -> node traffic should use the node's IP as the source for advertised UDNs. - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/clientip", clientNodeIP, false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in default network should not work", - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] is on nodes[0]. We need the same node. Let's hit the nodeport on nodes[0]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort - - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in default network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. - // The service is backed by podNetDefault. Let's hit the nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort - - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by pods in podsNetA. - // We want to hit the nodeport on the same node. - // client is on nodes[0]. Let's hit nodeport on nodes[0]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetA.Spec.Ports[0].NodePort - - // The service can be backed by any of the pods in podsNetA, so we can't reliably check the output hostname. - // Just check that the connection is successful. - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by pods in podsNetA. - // We want to hit the nodeport on a different node. - // client is on nodes[0]. Let's hit nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetA.Spec.Ports[0].NodePort - - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in different UDN network should not work", - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 and v6) networks. - // Reason it doesn't work today is because UDN networks don't have MAC bindings for masqueradeIPs of other networks. - // Traffic flow: UDN pod in network A -> samenode nodeIP:nodePort service of networkB - // UDN pod in networkA -> ovn-switch -> ovn-cluster-router (SNAT to masqueradeIP of networkA) -> mpX interface -> - // enters the host and hits IPTables rules to DNAT to clusterIP:Port of service of networkB. - // Then it hits the pkt_mark flows on breth0 and get's sent into networkB's patchport where it hits the GR. - // On the GR we DNAT to backend pod and SNAT to joinIP. - // Reply: Pod replies and now OVN in networkB tries to ARP for the masqueradeIP of networkA which is the source and simply - // fails as it doesn't know how to reach this masqueradeIP. - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetB.Spec.Ports[0].NodePort - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in different UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by podNetB. - // We want to hit the nodeport on a different node from the client. - // client is on nodes[0]. Let's hit nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetB.Spec.Ports[0].NodePort - - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", "", false - }), - - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + var ( + curlOutput string + curlErr bool + ) + if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { + clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + + curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() + curlErr = false + } else { + curlOutput = curlConnectionTimeoutCode + curlErr = true + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlOutput, curlErr + }), + ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on the same node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault and podNetB are on the same node + clientPod := podNetDefault + srvPod := podNetB + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on a different node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault and podsNetA[0] are on different nodes + clientPod := podNetDefault + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the default network should not be able to access a UDN service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podNetDefault.Name, podNetDefault.Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the UDN should be able to access a service in the same network", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", "", false + }), + ginkgo.Entry("pod in the UDN should not be able to access a default network service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + err := true + out := curlConnectionTimeoutCode + if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { + // FIXME: prevent looping of traffic in L2 UDNs + // bad behaviour: packet is looping from management port -> breth0 -> GR -> management port -> breth0 and so on + // which is a never ending loop + // this causes curl timeout with code 7 host unreachable instead of code 28 + out = "" + } + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetDefault.Spec.ClusterIPs), "8080") + "/clientip", out, err + }), + ginkgo.Entry("pod in the UDN should be able to access kapi in default network service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, "https://kubernetes.default/healthz", "", false + }), + ginkgo.Entry("pod in the UDN should be able to access kapi service cluster IP directly", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // Get kubernetes service from default namespace + kubernetesService, err := f.ClientSet.CoreV1().Services("default").Get(context.TODO(), "kubernetes", metav1.GetOptions{}) + framework.ExpectNoError(err, "should be able to get kubernetes service") + + // NOTE: See https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/2438-dual-stack-apiserver + // Today the kubernetes.default service is single-stack and cannot be dual-stack. + if isDualStackCluster(nodes) && ipFamily == utilnet.IPv6 { + e2eskipper.Skipf("Dual stack kubernetes.default service is not supported in kubernetes") + } + // Get the cluster IP for the specified IP family + clusterIP := getFirstIPStringOfFamily(ipFamily, kubernetesService.Spec.ClusterIPs) + gomega.Expect(clusterIP).NotTo(gomega.BeEmpty(), fmt.Sprintf("no cluster IP available for IP family %v", ipFamily)) + + // Access the kubernetes API at the cluster IP directly on port 443 + return podsNetA[0].Name, podsNetA[0].Namespace, fmt.Sprintf("https://%s/healthz", net.JoinHostPort(clusterIP, "443")), "", false + }), + ginkgo.Entry("pod in the UDN should not be able to access a service in a different UDN", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetB.Spec.ClusterIPs), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("host to a local UDN pod should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientNode := podsNetA[0].Spec.NodeName + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("host to a different node UDN pod should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[2] are on different nodes + clientNode := podsNetA[2].Spec.NodeName + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("UDN pod to local node should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + // FIXME: add the host process socket to the VRF for this test to work. + // This scenario is something that is not supported yet. So the test will continue to fail. + // This works the same on both normal UDNs and advertised UDNs. + // So because the process is not bound to the VRF, packet reaches the host but kernel sends a RESET. So its not code 28 but code7. + // 10:59:55.351067 319594f193d4d_3 P ifindex 191 0a:58:5d:5d:01:05 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 64, id 57264, + // offset 0, flags [DF], proto TCP (6), length 60) + // 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0x0aa5 (incorrect -> 0xe0b7), seq 3879759281, win 65280, + // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 + // 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264, + // offset 0, flags [DF], proto TCP (6), length 60) + // 169.154.169.12.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280, + // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 + // 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, + // offset 0, flags [DF], proto TCP (6), length 40) + // 172.18.0.2.25022 > 169.154.169.12.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0 + // 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, + // offset 0, flags [DF], proto TCP (6), length 40) + // 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0 + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", "", true + }), + ginkgo.Entry("UDN pod to a different node should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] and podsNetA[2] are on different nodes so we can pick the node of podsNetA[2] as the different node destination + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in different UDN network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in different UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + clientNode, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + clientNodeIPv4, clientNodeIPv6 := getNodeAddresses(clientNode) + clientNodeIP := clientNodeIPv4 + if ipFamily == utilnet.IPv6 { + clientNodeIP = clientNodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in default network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in default network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. - // So we hit nodeport on nodes[1]. - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podNetDefault.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=LOCAL] Default network pod to same node nodeport service in UDN network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // pod -> node traffic should use the node's IP as the source for advertised UDNs. + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/clientip", clientNodeIP, false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in default network should not work", + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] is on nodes[0]. We need the same node. Let's hit the nodeport on nodes[0]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort + + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in default network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. + // The service is backed by podNetDefault. Let's hit the nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort + + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by pods in podsNetA. + // We want to hit the nodeport on the same node. + // client is on nodes[0]. Let's hit nodeport on nodes[0]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetA.Spec.Ports[0].NodePort + + // The service can be backed by any of the pods in podsNetA, so we can't reliably check the output hostname. + // Just check that the connection is successful. + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by pods in podsNetA. + // We want to hit the nodeport on a different node. + // client is on nodes[0]. Let's hit nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetA.Spec.Ports[0].NodePort + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in different UDN network should not work", // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetDefault - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] Default network pod to different node nodeport service in UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault is on nodes[1]. We need a different node. podsNetA[0] is on nodes[0]. - // So we hit nodeport on nodes[0]. - clientPod := podNetDefault - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 and v6) networks. + // Reason it doesn't work today is because UDN networks don't have MAC bindings for masqueradeIPs of other networks. + // Traffic flow: UDN pod in network A -> samenode nodeIP:nodePort service of networkB + // UDN pod in networkA -> ovn-switch -> ovn-cluster-router (SNAT to masqueradeIP of networkA) -> mpX interface -> + // enters the host and hits IPTables rules to DNAT to clusterIP:Port of service of networkB. + // Then it hits the pkt_mark flows on breth0 and get's sent into networkB's patchport where it hits the GR. + // On the GR we DNAT to backend pod and SNAT to joinIP. + // Reply: Pod replies and now OVN in networkB tries to ARP for the masqueradeIP of networkA which is the source and simply + // fails as it doesn't know how to reach this masqueradeIP. + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ) + nodePort := svcNodePortNetB.Spec.Ports[0].NodePort + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in different UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by podNetB. + // We want to hit the nodeport on a different node from the client. + // client is on nodes[0]. Let's hit nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetB.Spec.Ports[0].NodePort + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", "", false + }), + + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in different UDN network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in different UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in default network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in default network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. + // So we hit nodeport on nodes[1]. + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podNetDefault.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=LOCAL] Default network pod to same node nodeport service in UDN network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetDefault + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] Default network pod to different node nodeport service in UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault is on nodes[1]. We need a different node. podsNetA[0] is on nodes[0]. + // So we hit nodeport on nodes[0]. + clientPod := podNetDefault + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + ) + }) }, ginkgo.Entry("Layer3", From d1daa85d8447ed59d250869aac1e8550ad23de59 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 18 Feb 2026 10:30:50 -0500 Subject: [PATCH 32/59] E2E: Fixes flaking ANP ACL logging The test was re-using the same namespace name, but would not wait for namespace deletion before starting the next test. This would cause the next test to fail because it cannot create the namespace while it is terminating. Change the test to use random namespace names. Fixes: #5993 Signed-off-by: Tim Rozet --- test/e2e/acl_logging.go | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/test/e2e/acl_logging.go b/test/e2e/acl_logging.go index c5c129769b..f07a81e3c4 100644 --- a/test/e2e/acl_logging.go +++ b/test/e2e/acl_logging.go @@ -190,18 +190,20 @@ var _ = Describe("ACL Logging for AdminNetworkPolicy and BaselineAdminNetworkPol nsNames [4]string ) BeforeEach(func() { + nsNames[0] = fr.Namespace.Name + suffix := framework.RandomSuffix() + nsNames[1] = fmt.Sprintf("anp-peer-restricted-%s", suffix) + nsNames[2] = fmt.Sprintf("anp-peer-open-%s", suffix) + nsNames[3] = fmt.Sprintf("anp-peer-unknown-%s", suffix) + By("creating an admin network policy") - err := makeAdminNetworkPolicy(anpName, "10", fr.Namespace.Name) + err := makeAdminNetworkPolicy(anpName, "10", fr.Namespace.Name, nsNames[1], nsNames[2], nsNames[3]) Expect(err).NotTo(HaveOccurred()) By("configuring the ACL logging level for the ANP") Expect(setANPACLLogSeverity(anpName, initialDenyACLSeverity, initialAllowACLSeverity, initialPassACLSeverity)).To(Succeed()) By("creating peer namespaces that are selected by the admin network policy") - nsNames[0] = fr.Namespace.Name - nsNames[1] = "anp-peer-restricted" - nsNames[2] = "anp-peer-open" - nsNames[3] = "anp-peer-unknown" for _, ns := range nsNames[1:] { _, err = e2ekubectl.RunKubectl("default", "create", "ns", ns) Expect(err).NotTo(HaveOccurred()) @@ -309,7 +311,7 @@ var _ = Describe("ACL Logging for AdminNetworkPolicy and BaselineAdminNetworkPol }, maxPokeRetries*pokeInterval, pokeInterval).Should(BeTrue()) By("creating a baseline admin network policy") - err = makeBaselineAdminNetworkPolicy(fr.Namespace.Name) + err = makeBaselineAdminNetworkPolicy(fr.Namespace.Name, nsNames[1], nsNames[3]) Expect(err).NotTo(HaveOccurred()) By("configuring the ACL logging level for the BANP") @@ -956,7 +958,7 @@ func makeDenyAllPolicy(f *framework.Framework, ns string, policyName string) (*k return f.ClientSet.NetworkingV1().NetworkPolicies(ns).Create(context.TODO(), policy, metav1.CreateOptions{}) } -func makeAdminNetworkPolicy(anpName, priority, anpSubjectNS string) error { +func makeAdminNetworkPolicy(anpName, priority, anpSubjectNS, restrictedPeerNS, openPeerNS, unknownPeerNS string) error { anpYaml := "anp.yaml" var anpConfig = fmt.Sprintf(`apiVersion: policy.networking.k8s.io/v1alpha1 kind: AdminNetworkPolicy @@ -974,20 +976,20 @@ spec: to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-restricted + kubernetes.io/metadata.name: %s - name: "deny-to-open" action: "Deny" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-open + kubernetes.io/metadata.name: %s - name: "pass-to-unknown" action: "Pass" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-unknown -`, anpName, priority, anpSubjectNS) + kubernetes.io/metadata.name: %s +`, anpName, priority, anpSubjectNS, restrictedPeerNS, openPeerNS, unknownPeerNS) if err := os.WriteFile(anpYaml, []byte(anpConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) @@ -1003,7 +1005,7 @@ spec: return err } -func makeBaselineAdminNetworkPolicy(banpSubjectNS string) error { +func makeBaselineAdminNetworkPolicy(banpSubjectNS, restrictedPeerNS, unknownPeerNS string) error { banpYaml := "banp.yaml" var banpConfig = fmt.Sprintf(`apiVersion: policy.networking.k8s.io/v1alpha1 kind: BaselineAdminNetworkPolicy @@ -1020,14 +1022,14 @@ spec: to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-restricted + kubernetes.io/metadata.name: %s - name: "deny-to-unknown" action: "Deny" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-unknown -`, banpSubjectNS) + kubernetes.io/metadata.name: %s +`, banpSubjectNS, restrictedPeerNS, unknownPeerNS) if err := os.WriteFile(banpYaml, []byte(banpConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) From 9caab0e4575959d522758f673224583438bb1acb Mon Sep 17 00:00:00 2001 From: Yun Zhou Date: Fri, 6 Feb 2026 11:35:40 -0800 Subject: [PATCH 33/59] cleanup logical entities for stale primary networks Signed-off-by: Yun Zhou --- .../controllermanager/controller_manager.go | 58 ++++++++++------ .../base_network_controller_user_defined.go | 69 +++++++++++++++++++ go-controller/pkg/ovn/gateway.go | 31 +++++++++ .../layer2_user_defined_network_controller.go | 23 +++++-- .../layer3_user_defined_network_controller.go | 32 +++++++-- 5 files changed, 178 insertions(+), 35 deletions(-) diff --git a/go-controller/pkg/controllermanager/controller_manager.go b/go-controller/pkg/controllermanager/controller_manager.go index 61a342f77a..eac0c84651 100644 --- a/go-controller/pkg/controllermanager/controller_manager.go +++ b/go-controller/pkg/controllermanager/controller_manager.go @@ -92,15 +92,17 @@ func (cm *ControllerManager) NewNetworkController(nInfo util.NetInfo) (networkma return nil, fmt.Errorf("topology type %s not supported", topoType) } -// newDummyNetworkController creates a dummy network controller used to clean up specific network -func (cm *ControllerManager) newDummyNetworkController(topoType, netName string) (networkmanager.NetworkController, error) { +// newDummyNetworkController creates a dummy network controller used to clean up specific network. +// role is the NetworkRoleExternalID from stale OVN entities (e.g. "primary" or "secondary") so that +// the dummy's netInfo.IsPrimaryNetwork() is correct for Layer2 gateway cleanup. +func (cm *ControllerManager) newDummyNetworkController(topoType, netName, role string) (networkmanager.NetworkController, error) { // Pass a shallow clone of the watch factory, this allows multiplexing // informers for user-defined Networks. cnci, err := cm.newCommonNetworkControllerInfo(cm.watchFactory.ShallowClone()) if err != nil { return nil, fmt.Errorf("failed to create network controller info %w", err) } - netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: topoType}) + netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: topoType, Role: role}) switch topoType { case ovntypes.Layer3Topology: return ovn.NewLayer3UserDefinedNetworkController(cnci, netInfo, cm.networkManager.Interface(), cm.routeImportManager, cm.eIPController, cm.portCache) @@ -112,33 +114,38 @@ func (cm *ControllerManager) newDummyNetworkController(topoType, netName string) return nil, fmt.Errorf("topology type %s not supported", topoType) } -// Find all the OVN logical switches/routers for the secondary networks -func findAllSecondaryNetworkLogicalEntities(nbClient libovsdbclient.Client) ([]*nbdb.LogicalSwitch, +// findAllUserDefinedNetworkLogicalEntities returns all OVN logical switches and +// routers that belong to user-defined networks (primary or secondary). Same +// predicate as original: entities have NetworkExternalID and NetworkRoleExternalID +// (TopologyExternalID always co-exists with NetworkExternalID per CleanupStaleNetworks). +// Caller reads role and topoType from entity ExternalIDs for dummy controller creation. +// Used on controller restart to remove stale entities for deleted UDNs. +func findAllUserDefinedNetworkLogicalEntities(nbClient libovsdbclient.Client) ([]*nbdb.LogicalSwitch, []*nbdb.LogicalRouter, error) { - belongsToSecondaryNetwork := func(externalIDs map[string]string) bool { + belongsToUserDefinedNetwork := func(externalIDs map[string]string) bool { _, hasNetworkExternalID := externalIDs[ovntypes.NetworkExternalID] - networkRole, hasNetworkRoleExternalID := externalIDs[ovntypes.NetworkRoleExternalID] - return hasNetworkExternalID && hasNetworkRoleExternalID && networkRole == ovntypes.NetworkRoleSecondary + _, hasNetworkRoleExternalID := externalIDs[ovntypes.NetworkRoleExternalID] + return hasNetworkExternalID && hasNetworkRoleExternalID } p1 := func(item *nbdb.LogicalSwitch) bool { - return belongsToSecondaryNetwork(item.ExternalIDs) + return belongsToUserDefinedNetwork(item.ExternalIDs) } - nodeSwitches, err := libovsdbops.FindLogicalSwitchesWithPredicate(nbClient, p1) + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(nbClient, p1) if err != nil { - klog.Errorf("Failed to get all logical switches of secondary network error: %v", err) + klog.Errorf("Failed to get all logical switches of user-defined networks: %v", err) return nil, nil, err } p2 := func(item *nbdb.LogicalRouter) bool { - return belongsToSecondaryNetwork(item.ExternalIDs) + return belongsToUserDefinedNetwork(item.ExternalIDs) } - clusterRouters, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, p2) + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, p2) if err != nil { - klog.Errorf("Failed to get all distributed logical routers: %v", err) + klog.Errorf("Failed to get all logical routers of user-defined networks: %v", err) return nil, nil, err } - return nodeSwitches, clusterRouters, nil + return switches, routers, nil } func (cm *ControllerManager) GetDefaultNetworkController() networkmanager.ReconcilableNetworkController { @@ -155,8 +162,9 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) } } - // Get all the existing secondary networks and its logical entities - switches, routers, err := findAllSecondaryNetworkLogicalEntities(cm.nbClient) + // Get all the existing user-defined network logical entities (primary and secondary). + // For a given network, all switches/routers have the same role external ID (primary or secondary). + switches, routers, err := findAllUserDefinedNetworkLogicalEntities(cm.nbClient) if err != nil { return err } @@ -170,11 +178,15 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) // network still exists, no cleanup to do continue } + role := ls.ExternalIDs[ovntypes.NetworkRoleExternalID] + if _, ok := staleNetworkControllers[netName]; ok { + // already have a dummy controller for this network (from an earlier entity) + continue + } // Create dummy network controllers to clean up logical entities klog.V(5).Infof("Found stale %s network %s", topoType, netName) - if oc, err := cm.newDummyNetworkController(topoType, netName); err == nil { + if oc, err := cm.newDummyNetworkController(topoType, netName, role); err == nil { staleNetworkControllers[netName] = oc - continue } } for _, lr := range routers { @@ -185,11 +197,15 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) // network still exists, no cleanup to do continue } + role := lr.ExternalIDs[ovntypes.NetworkRoleExternalID] + if _, ok := staleNetworkControllers[netName]; ok { + // already have a dummy controller for this network (from an earlier entity) + continue + } // Create dummy network controllers to clean up logical entities klog.V(5).Infof("Found stale %s network %s", topoType, netName) - if oc, err := cm.newDummyNetworkController(topoType, netName); err == nil { + if oc, err := cm.newDummyNetworkController(topoType, netName, role); err == nil { staleNetworkControllers[netName] = oc - continue } } diff --git a/go-controller/pkg/ovn/base_network_controller_user_defined.go b/go-controller/pkg/ovn/base_network_controller_user_defined.go index 38fae086bd..c5481ef265 100644 --- a/go-controller/pkg/ovn/base_network_controller_user_defined.go +++ b/go-controller/pkg/ovn/base_network_controller_user_defined.go @@ -819,6 +819,75 @@ func (bsnc *BaseUserDefinedNetworkController) WatchMultiNetworkPolicy() error { return nil } +// cleanupGatewayRoutersForNetworkFromDB discovers all gateway routers for the given network from +// the NB DB (by ExternalIDs and GWRouterPrefix) and cleans each one via a dummy GatewayManager. +// Used when gateway managers are empty (e.g. dummy controller or stale cleanup) so cleanup works +// even when nodes are gone. +func cleanupGatewayRoutersForNetworkFromDB( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + clusterRouterName, joinSwitchName string, +) error { + var errs []error + networkName := netInfo.GetNetworkName() + pred := func(lr *nbdb.LogicalRouter) bool { + return lr.ExternalIDs[types.NetworkExternalID] == networkName && + strings.HasPrefix(lr.Name, types.GWRouterPrefix) + } + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, pred) + if err != nil { + return fmt.Errorf("failed to find gateway routers for network %s: %w", networkName, err) + } + layer2UseTransitRouter := netInfo.TopologyType() == types.Layer2Topology && config.Layer2UsesTransitRouter + for _, lr := range routers { + nodeName := netInfo.RemoveNetworkScopeFromName(util.GetWorkerFromGatewayRouter(lr.Name)) + gw := NewGatewayManagerForCleanup(nbClient, netInfo, clusterRouterName, joinSwitchName, lr.Name, nodeName, layer2UseTransitRouter) + if err := gw.Cleanup(); err != nil { + errs = append(errs, fmt.Errorf("failed to cleanup gateway router %s for network %q (node %s): %w", lr.Name, networkName, nodeName, err)) + } + } + return utilerrors.Join(errs...) +} + +// cleanupLoadBalancerGroups removes load balancer groups for a user-defined network controller. +// When LB group UUIDs are known (normal controller), they are deleted directly by UUID. +// Otherwise (dummy/stale cleanup controller), the groups are looked up by network-scoped name. +func cleanupLoadBalancerGroups( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + switchLBGroupUUID, clusterLBGroupUUID, routerLBGroupUUID string, +) { + networkName := netInfo.GetNetworkName() + if switchLBGroupUUID != "" || clusterLBGroupUUID != "" || routerLBGroupUUID != "" { + lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) + for _, lbGroupUUID := range []string{switchLBGroupUUID, clusterLBGroupUUID, routerLBGroupUUID} { + if lbGroupUUID != "" { + lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) + } + } + if err := libovsdbops.DeleteLoadBalancerGroups(nbClient, lbGroups); err != nil { + klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", networkName, err) + } + return + } + // Dummy controller (e.g. stale UDN cleanup): find LB groups by network-scoped name and delete them + names := map[string]bool{ + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterLBGroupName): true, + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterSwitchLBGroupName): true, + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterRouterLBGroupName): true, + } + staleLBGroups, err := libovsdbops.FindLoadBalancerGroupsWithPredicate(nbClient, func(g *nbdb.LoadBalancerGroup) bool { + return names[g.Name] + }) + if err != nil { + klog.Errorf("Failed to find load balancer groups for stale network %q: %v", networkName, err) + } else if len(staleLBGroups) > 0 { + if err := libovsdbops.DeleteLoadBalancerGroups(nbClient, staleLBGroups); err != nil { + klog.Errorf("Failed to delete load balancer groups on stale network: %q, error: %v", networkName, err) + } + } +} + // cleanupPolicyLogicalEntities cleans up all the port groups and address sets that belong to the given controller func cleanupPolicyLogicalEntities(nbClient libovsdbclient.Client, ops []ovsdb.Operation, controllerName string) ([]ovsdb.Operation, error) { var err error diff --git a/go-controller/pkg/ovn/gateway.go b/go-controller/pkg/ovn/gateway.go index ddce0de5c7..a961d301c5 100644 --- a/go-controller/pkg/ovn/gateway.go +++ b/go-controller/pkg/ovn/gateway.go @@ -1381,6 +1381,37 @@ func (gw *GatewayManager) Cleanup() error { return nil } +// NewGatewayManagerForCleanup returns a minimal GatewayManager used only for Cleanup(). Used when +// discovering gateway routers from the DB (e.g. stale cleanup when nodes are gone). layer2UseTransitRouter +// selects the peer port cleanup path (transit router LRP vs join switch LSP). +// +// NOTE: transitRouterInfo is set to an empty struct (not nil) when layer2UseTransitRouter is true. +// This is safe because Cleanup() only checks (transitRouterInfo != nil) to choose between +// deleteGWRouterPeerRouterPort and deleteGWRouterPeerSwitchPort — neither of which accesses +// transitRouterInfo fields. If Cleanup() is ever changed to dereference transitRouterInfo fields, +// this constructor must be updated accordingly. +func NewGatewayManagerForCleanup( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + clusterRouterName, joinSwitchName, gwRouterName, nodeName string, + layer2UseTransitRouter bool, +) *GatewayManager { + var tri *transitRouterInfo + if layer2UseTransitRouter { + tri = &transitRouterInfo{} + } + return &GatewayManager{ + nodeName: nodeName, + clusterRouterName: clusterRouterName, + gwRouterName: gwRouterName, + extSwitchName: netInfo.GetNetworkScopedExtSwitchName(nodeName), + joinSwitchName: joinSwitchName, + nbClient: nbClient, + netInfo: netInfo, + transitRouterInfo: tri, + } +} + func (gw *GatewayManager) delPbrAndNatRules(nodeName string) { // delete the dnat_and_snat entry that we added for the management port IP // Note: we don't need to delete any MAC bindings that are dynamically learned from OVN SB DB diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go index f47b0c14a6..3649153b41 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go @@ -495,6 +495,20 @@ func (oc *Layer2UserDefinedNetworkController) run() error { // could be called from a dummy Controller (only has CommonNetworkControllerInfo set) func (oc *Layer2UserDefinedNetworkController) Cleanup() error { networkName := oc.GetNetworkName() + + // For primary Layer2 UDN only: when this is a cleanup-only controller (dummy for stale UDN + // cleanup; GetNetworkID() is InvalidID because netInfo was never reconciled from a NAD), + // discover and cleanup all gateway routers from the NB DB. DB-driven cleanup works even + // when nodes are already gone. + if oc.IsPrimaryNetwork() && oc.GetNetworkID() == types.InvalidID { + if err := cleanupGatewayRoutersForNetworkFromDB(oc.nbClient, oc.GetNetInfo(), + oc.GetNetworkScopedClusterRouterName(), oc.GetNetworkScopedJoinSwitchName()); err != nil { + return fmt.Errorf("failed to cleanup gateway routers for network %s: %w", networkName, err) + } + } + + // Switch that holds management ports is deleted below (BaseLayer2UserDefinedNetworkController.cleanup); + // LSPs are cascade-deleted with the logical switch. if err := oc.BaseLayer2UserDefinedNetworkController.cleanup(); err != nil { return fmt.Errorf("failed to cleanup network %q: %w", networkName, err) } @@ -531,13 +545,8 @@ func (oc *Layer2UserDefinedNetworkController) Cleanup() error { } // remove load balancer groups - lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) - for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { - lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) - } - if err := libovsdbops.DeleteLoadBalancerGroups(oc.nbClient, lbGroups); err != nil { - klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", oc.GetNetworkName(), err) - } + cleanupLoadBalancerGroups(oc.nbClient, oc.GetNetInfo(), + oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID) return nil } diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go index 57059e296c..b28d1a7ae3 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go @@ -523,6 +523,19 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { // Note : Cluster manager removes the subnet annotation for the node. netName := oc.GetNetworkName() klog.Infof("Delete OVN logical entities for %s network controller of network %s", types.Layer3Topology, netName) + + // For primary L3 UDN only: when this is a cleanup-only controller (dummy for stale UDN + // cleanup; GetNetworkID() is InvalidID because netInfo was never reconciled from a NAD), + // discover and cleanup all gateway routers from the NB DB. DB-driven cleanup works even + // when nodes are already gone. + if oc.IsPrimaryNetwork() && oc.GetNetworkID() == types.InvalidID { + if err := cleanupGatewayRoutersForNetworkFromDB(oc.nbClient, oc.GetNetInfo(), + oc.GetNetworkScopedClusterRouterName(), oc.GetNetworkScopedJoinSwitchName()); err != nil { + return fmt.Errorf("failed to cleanup gateway routers for network %s: %w", netName, err) + } + } + + // Node switches (which hold management port LSPs) are deleted below; LSPs are cascade-deleted with the logical switch. // first delete node logical switches ops, err = libovsdbops.DeleteLogicalSwitchesWithPredicateOps(oc.nbClient, ops, func(item *nbdb.LogicalSwitch) bool { @@ -562,6 +575,16 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { return err } + // Delete QoS rows for this network (e.g. from NetworkQoS controller). Applies to primary and + // secondary Layer3 UDNs when EnableNetworkQoS is set. + ops, err = libovsdbops.DeleteQoSesWithPredicateOps(oc.nbClient, ops, + func(item *nbdb.QoS) bool { + return item.ExternalIDs[types.NetworkExternalID] == netName + }) + if err != nil { + return fmt.Errorf("failed to get ops for deleting QoSes of network %s: %v", netName, err) + } + _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) if err != nil { return fmt.Errorf("failed to deleting routers/switches of network %s: %v", netName, err) @@ -574,13 +597,8 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { } // remove load balancer groups - lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) - for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { - lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) - } - if err := libovsdbops.DeleteLoadBalancerGroups(oc.nbClient, lbGroups); err != nil { - klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", oc.GetNetworkName(), err) - } + cleanupLoadBalancerGroups(oc.nbClient, oc.GetNetInfo(), + oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID) return nil } From 005e15b8fb0fdce2e8ec9464ad596ddf5c63f204 Mon Sep 17 00:00:00 2001 From: Yun Zhou Date: Fri, 6 Feb 2026 19:33:50 -0800 Subject: [PATCH 34/59] add unit tests for stale primary UDN cleanup Signed-off-by: Yun Zhou --- ...r2_user_defined_network_controller_test.go | 89 +++++++++++++++ ...r3_user_defined_network_controller_test.go | 106 ++++++++++++++++++ 2 files changed, 195 insertions(+) diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go index 7461784139..32cfaca26b 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go @@ -483,6 +483,95 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { ), ) + It("primary layer 2 UDN: controller creates entities via init/watchers, then dummy Cleanup() removes them", func() { + config.OVNKubernetesFeature.EnableMultiNetwork = true + setupConfig(dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16"), testConfiguration{}, config.GatewayModeShared) + app.Action = func(ctx *cli.Context) error { + netInfo := dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16") + netConf := netInfo.netconf() + networkConfig, err := util.NewNetInfo(netConf) + Expect(err).NotTo(HaveOccurred()) + mutableNetInfo := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup.SetNetworkID(ovntypes.InvalidID) + + nad, err := newNetworkAttachmentDefinition(ns, nadName, *netConf) + Expect(err).NotTo(HaveOccurred()) + fakeNetworkManager := &testnm.FakeNetworkManager{ + PrimaryNetworks: map[string]util.NetInfo{}, + } + fakeNetworkManager.PrimaryNetworks[ns] = mutableNetInfo + + const nodeIPv4CIDR = "192.168.126.202/24" + testNode, err := newNodeWithUserDefinedNetworks(nodeName, nodeIPv4CIDR, netInfo) + Expect(err).NotTo(HaveOccurred()) + nbZone := &nbdb.NBGlobal{Name: config.Default.Zone, UUID: config.Default.Zone} + + // Minimal initialDB: no UDN entities. init() + watchers create them. + initialDB.NBData = append(initialDB.NBData, nbZone) + Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) + + fakeOvn.startWithDBSetup( + initialDB, + &corev1.NamespaceList{Items: []corev1.Namespace{*newUDNNamespace(ns)}}, + &corev1.NodeList{Items: []corev1.Node{*testNode}}, + &corev1.PodList{Items: []corev1.Pod{}}, + &nadapi.NetworkAttachmentDefinitionList{Items: []nadapi.NetworkAttachmentDefinition{*nad}}, + ) + + Expect(fakeOvn.networkManager.Start()).To(Succeed()) + defer fakeOvn.networkManager.Stop() + Expect(fakeOvn.controller.WatchNamespaces()).To(Succeed()) + Expect(fakeOvn.controller.WatchPods()).To(Succeed()) + + // Run init() to create cluster-level entities, then watchers so node sync creates per-node entities. + l2Controller, ok := fakeOvn.fullL2UDNControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + Expect(l2Controller.init()).To(Succeed()) + udnNetController, ok := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + udnNetController.bnc.ovnClusterLRPToJoinIfAddrs = dummyJoinIPs() + Expect(l2Controller.WatchNodes()).To(Succeed()) + Expect(l2Controller.WatchPods()).To(Succeed()) + Expect(l2Controller.WatchNetworkPolicy()).To(Succeed()) + + // Wait for the controller to create the Layer2 switch. + udnLSName := l2Controller.GetNetworkScopedSwitchName(ovntypes.OVNLayer2Switch) + Eventually(func(g Gomega) { + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(fakeOvn.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return ls.Name == udnLSName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(switches).NotTo(BeEmpty()) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Assert gateway router was created before cleanup. + udnGWRouterName := l2Controller.GetNetworkScopedGWRouterName(nodeName) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.Name == udnGWRouterName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty()) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Dummy controller with InvalidID runs Cleanup() to remove all entities for this network. + dummyController, err := NewLayer2UserDefinedNetworkController( + &l2Controller.CommonNetworkControllerInfo, + mutableNetInfoCleanup, + fakeOvn.networkManager.Interface(), + nil, + NewPortCache(ctx.Done()), + nil, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(dummyController.Cleanup()).To(Succeed()) + Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(generateUDNPostInitDB([]libovsdbtest.TestData{nbZone}))) + return nil + } + Expect(app.Run([]string{app.Name})).To(Succeed()) + }) + It("controller should cleanup stale nodes on startup", func() { app.Action = func(*cli.Context) error { netInfo := dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16") diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go index ed70df467f..caea5164a9 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go @@ -459,6 +459,112 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 3 network", func() { }), ), ) + + It("primary Layer 3 UDN: controller creates entities via init/watchers, then dummy Cleanup() removes them", func() { + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + netInfo := dummyPrimaryLayer3UserDefinedNetwork("192.168.0.0/16", "192.168.1.0/24") + app.Action = func(ctx *cli.Context) error { + netConf := netInfo.netconf() + networkConfig, err := util.NewNetInfo(netConf) + Expect(err).NotTo(HaveOccurred()) + // For cleanup we use a copy with InvalidID so the dummy controller treats the network as stale. + mutableNetInfoCleanup := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup.SetNetworkID(types.InvalidID) + + nad, err := newNetworkAttachmentDefinition(ns, nadName, *netConf) + Expect(err).NotTo(HaveOccurred()) + // Dummy controller only runs Cleanup(), which does not use the network manager; empty fake is enough. + fakeNetworkManager := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + const nodeIPv4CIDR = "192.168.126.202/24" + testNode, err := newNodeWithUserDefinedNetworks(nodeName, nodeIPv4CIDR, netInfo) + Expect(err).NotTo(HaveOccurred()) + + // NB_Global with default zone so GetNBZone returns it; node without zone annotation is treated as local. + nbZone := &nbdb.NBGlobal{Name: types.OvnDefaultZone, UUID: types.OvnDefaultZone} + // Post-cleanup DB: default net node switch + NB_Global + global entities (Copp, meters) as in Layer2 test. + defaultNetExpectations := generateUDNPostInitDB(append(emptyDefaultClusterNetworkNodeSwitch(nodeName), nbZone)) + + // Minimal initialDB: default net node switch, no UDN entities. The UDN controller's Start() + // runs init() which creates cluster router and join switch; then node sync creates per-node entities. + initialDB.NBData = append(initialDB.NBData, nbZone) + Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) + + fakeOvn.startWithDBSetup( + initialDB, + &corev1.NamespaceList{Items: []corev1.Namespace{*newUDNNamespace(ns)}}, + &corev1.NodeList{Items: []corev1.Node{*testNode}}, + &corev1.PodList{Items: []corev1.Pod{}}, + &nadapi.NetworkAttachmentDefinitionList{Items: []nadapi.NetworkAttachmentDefinition{*nad}}, + ) + + // Mock ovn-nbctl list Load_Balancer_Group (used by UDN controller init; default controller init is not run in this test). + fexec := util.GetExec().(*testing.FakeExec) + fexec.AddFakeCmdsNoOutputNoError([]string{ + "ovn-nbctl --timeout=15 --columns=_uuid list Load_Balancer_Group", + }) + + // networkManager is already started by startWithDBSetup (via init()) and stopped by AfterEach (shutdown). + Expect(fakeOvn.controller.WatchNamespaces()).To(Succeed()) + Expect(fakeOvn.controller.WatchPods()).To(Succeed()) + + // Run init() to create cluster-level entities (cluster router, join switch, LB groups, etc.), + // then start watchers so node sync creates per-node entities (node LS, GW router, etc.). + l3Controller, ok := fakeOvn.fullL3UDNControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + Expect(l3Controller.init()).To(Succeed()) + Expect(l3Controller.WatchNodes()).To(Succeed()) + Expect(l3Controller.WatchPods()).To(Succeed()) + Expect(l3Controller.WatchNetworkPolicy()).To(Succeed()) + + // Wait for the controller to create UDN entities: assert any switches and routers exist with this network's external-ids, + // and that the gateway router for this node exists. + networkName := networkConfig.GetNetworkName() + gwRouterName := networkConfig.GetNetworkScopedGWRouterName(nodeName) + Eventually(func(g Gomega) { + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(fakeOvn.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return ls.ExternalIDs != nil && ls.ExternalIDs[types.NetworkExternalID] == networkName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(switches).NotTo(BeEmpty(), "at least one LogicalSwitch for network %q should exist", networkName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.ExternalIDs != nil && lr.ExternalIDs[types.NetworkExternalID] == networkName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty(), "at least one LogicalRouter for network %q should exist", networkName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.Name == gwRouterName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty(), "gateway router %q should exist", gwRouterName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Do NOT delete the NAD. Simulate CleanupStaleNetworks(no valid networks): dummy controller + // with InvalidID runs Cleanup() so our network is treated as stale and all its entities are removed. + dummyController, err := NewLayer3UserDefinedNetworkController( + &l3Controller.CommonNetworkControllerInfo, + mutableNetInfoCleanup, + fakeNetworkManager, + nil, + nil, + NewPortCache(ctx.Done()), + ) + Expect(err).NotTo(HaveOccurred()) + Expect(dummyController.Cleanup()).To(Succeed()) + + Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(defaultNetExpectations)) + return nil + } + Expect(app.Run([]string{app.Name})).To(Succeed()) + }) + Describe("Dynamic UDN allocation with remote node", func() { It("activates a remote node when a NAD becomes active and cleans it up when inactive", func() { Expect(config.PrepareTestConfig()).To(Succeed()) From 913755316dcf13af5ef70d6778ad8b604bdb771f Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Sun, 15 Feb 2026 10:49:01 -0500 Subject: [PATCH 35/59] Reduce memory allocation by OpenFlow Manager It was seen during a scale run that there is significant memory consumption used by OpenFlow Manager: Showing nodes accounting for 2995262.20MB, 90.52% of 3308836.28MB total Dropped 2093 nodes (cum <= 16544.18MB) flat flat% sum% cum cum% 0 0% 0% 1337234.51MB 40.41% github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*openflowManager).Run.func1 289192.79MB 8.74% 8.74% 1335678.47MB 40.37% github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*openflowManager).syncFlows 6MB 0.00018% 8.74% 1046400.59MB 31.62% github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util.ReplaceOFFlows 0 0% 8.74% 1041733.16MB 31.48% github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry.(*RetryFramework).DoWithLock 599.04MB 0.018% 8.76% 709890.84MB 21.45% bytes.(*Buffer).grow 709291.80MB 21.44% 30.19% 709291.80MB 21.44% bytes.growSlice 0 0% 30.19% 707386.34MB 21.38% bytes.(*Buffer).Write The per cycle allocation is high due to: 1. Flows were being appended to a slice causing underlying array copying. 2. strings.Join was being used to create stdin with flows, causing giant string creation. 3. Output from 2 was being copied into a giant byte slice. 4. This byte slice was then being copied again into a buffer for stdin consumption. This commit fixes it by: 1. Flatting flows with pre-allocated slice capacity. 2. Creating a io reader that is fed directly into stdin. This way we can avoid having to build a giant byte slice and copy around strings. The stdin reader can just read as many bytes as it needs to directly from a struct holding the flow slice. Signed-off-by: Tim Rozet --- go-controller/pkg/node/openflow_manager.go | 36 +++++++------ go-controller/pkg/util/ovs.go | 63 +++++++++++++++++++++- go-controller/pkg/util/ovs_unit_test.go | 44 ++++++++++++++- 3 files changed, 124 insertions(+), 19 deletions(-) diff --git a/go-controller/pkg/node/openflow_manager.go b/go-controller/pkg/node/openflow_manager.go index b55fff21cd..d2dc2eb82f 100644 --- a/go-controller/pkg/node/openflow_manager.go +++ b/go-controller/pkg/node/openflow_manager.go @@ -119,34 +119,40 @@ func (c *openflowManager) requestFlowSync() { func (c *openflowManager) syncFlows() { c.flowMutex.Lock() - defer c.flowMutex.Unlock() - - flows := []string{} - for _, entry := range c.flowCache { - flows = append(flows, entry...) - } + flows := flattenFlowCacheEntries(c.flowCache) + c.flowMutex.Unlock() _, stderr, err := util.ReplaceOFFlows(c.defaultBridge.GetBridgeName(), flows) if err != nil { - klog.Errorf("Failed to add flows, error: %v, stderr, %s, flows: %s", err, stderr, c.flowCache) + klog.Errorf("Failed to add flows for bridge %s, error: %v, stderr, %s, flow count: %d", + c.defaultBridge.GetBridgeName(), err, stderr, len(flows)) } if c.externalGatewayBridge != nil { c.exGWFlowMutex.Lock() - defer c.exGWFlowMutex.Unlock() - - flows := []string{} - for _, entry := range c.exGWFlowCache { - flows = append(flows, entry...) - } + exGWFlows := flattenFlowCacheEntries(c.exGWFlowCache) + c.exGWFlowMutex.Unlock() - _, stderr, err := util.ReplaceOFFlows(c.externalGatewayBridge.GetBridgeName(), flows) + _, stderr, err := util.ReplaceOFFlows(c.externalGatewayBridge.GetBridgeName(), exGWFlows) if err != nil { - klog.Errorf("Failed to add flows, error: %v, stderr, %s, flows: %s", err, stderr, c.exGWFlowCache) + klog.Errorf("Failed to add flows for bridge %s, error: %v, stderr, %s, flow count: %d", + c.externalGatewayBridge.GetBridgeName(), err, stderr, len(exGWFlows)) } } } +func flattenFlowCacheEntries(flowCache map[string][]string) []string { + flowCount := 0 + for _, entry := range flowCache { + flowCount += len(entry) + } + flows := make([]string, 0, flowCount) + for _, entry := range flowCache { + flows = append(flows, entry...) + } + return flows +} + // since we share the host's k8s node IP, add OpenFlow flows // -- to steer the NodePort traffic arriving on the host to the OVN logical topology and // -- to also connection track the outbound north-south traffic through l3 gateway so that diff --git a/go-controller/pkg/util/ovs.go b/go-controller/pkg/util/ovs.go index b32f73999b..3c301c202d 100644 --- a/go-controller/pkg/util/ovs.go +++ b/go-controller/pkg/util/ovs.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/json" "fmt" + "io" "net" "path/filepath" "regexp" @@ -669,11 +670,69 @@ func AddOFFlowWithSpecificAction(bridgeName, action string) (string, string, err return strings.Trim(stdout.String(), "\" \n"), stderr.String(), err } +// openFlowStdinReader incrementally renders a flow slice as a newline-delimited +// stream for ovs-ofctl stdin without constructing one large joined string. +type openFlowStdinReader struct { + flows []string + flowIndex int + flowOffset int + needEOL bool +} + +// Read implements io.Reader over r.flows, producing output equivalent to +// strings.Join(flows, "\n"), but in small chunks to reduce peak allocations. +func (r *openFlowStdinReader) Read(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil + } + // Fast path: no flows left and no pending delimiter. + if r.flowIndex >= len(r.flows) && !r.needEOL { + return 0, io.EOF + } + + total := 0 + for total < len(p) { + if r.needEOL { + // Emit exactly one '\n' between flows. + p[total] = '\n' + total++ + r.needEOL = false + if total == len(p) { + return total, nil + } + continue + } + + if r.flowIndex >= len(r.flows) { + break + } + + flow := r.flows[r.flowIndex] + if r.flowOffset >= len(flow) { + // Current flow was fully consumed; advance and schedule delimiter if + // there is another flow. + r.flowIndex++ + r.flowOffset = 0 + r.needEOL = r.flowIndex < len(r.flows) + continue + } + + // Copy as much of the current flow as fits in caller's buffer. + copied := copy(p[total:], flow[r.flowOffset:]) + total += copied + r.flowOffset += copied + } + + if total == 0 { + return 0, io.EOF + } + return total, nil +} + // ReplaceOFFlows replaces flows in the bridge with a slice of flows func ReplaceOFFlows(bridgeName string, flows []string) (string, string, error) { args := []string{"-O", "OpenFlow13", "--bundle", "replace-flows", bridgeName, "-"} - stdin := &bytes.Buffer{} - stdin.Write([]byte(strings.Join(flows, "\n"))) + stdin := &openFlowStdinReader{flows: flows} cmd := runner.exec.Command(runner.ofctlPath, args...) cmd.SetStdin(stdin) diff --git a/go-controller/pkg/util/ovs_unit_test.go b/go-controller/pkg/util/ovs_unit_test.go index 2b8e633949..b832c89af2 100644 --- a/go-controller/pkg/util/ovs_unit_test.go +++ b/go-controller/pkg/util/ovs_unit_test.go @@ -3,7 +3,9 @@ package util import ( "bytes" "fmt" + "io" "os" + "strings" "testing" "time" @@ -1695,14 +1697,14 @@ func TestReplaceOFFlows(t *testing.T) { expectedErr: fmt.Errorf("failed to execute ovs-ofctl command"), onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{nil, nil, fmt.Errorf("failed to execute ovs-ofctl command")}}, onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{mockCmd}}, - onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*bytes.Buffer"}}, + onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*util.openFlowStdinReader"}}, }, { desc: "positive: run `ovs-ofctl` command", expectedErr: nil, onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{bytes.NewBuffer([]byte("testblah")), bytes.NewBuffer([]byte("")), nil}}, onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{mockCmd}}, - onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*bytes.Buffer"}}, + onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*util.openFlowStdinReader"}}, }, } for i, tc := range tests { @@ -1722,6 +1724,44 @@ func TestReplaceOFFlows(t *testing.T) { } } +func TestOpenFlowStdinReader(t *testing.T) { + tests := []struct { + desc string + flows []string + }{ + { + desc: "empty flow list", + flows: []string{}, + }, + { + desc: "single flow", + flows: []string{"table=0,priority=0,actions=NORMAL"}, + }, + { + desc: "multiple flows", + flows: []string{"a", "b", "c"}, + }, + { + desc: "includes empty flow", + flows: []string{"a", "", "c"}, + }, + } + + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + r := &openFlowStdinReader{flows: tc.flows} + out, err := io.ReadAll(r) + require.NoError(t, err) + assert.Equal(t, strings.Join(tc.flows, "\n"), string(out)) + + buf := make([]byte, 1) + n, eof := r.Read(buf) + assert.Equal(t, 0, n) + assert.Equal(t, io.EOF, eof) + }) + } +} + func TestGetOVNDBServerInfo(t *testing.T) { mockKexecIface := new(mock_k8s_io_utils_exec.Interface) mockExecRunner := new(mocks.ExecRunner) From 2d3e48b739625ef23c2ebf56602385d42389755b Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Sun, 15 Feb 2026 11:08:28 -0500 Subject: [PATCH 36/59] Adds benchmark test for OpenFlow stream reader Includes legacy string join buffer test to show the improvement of the new openFlowStdinReader. go test ./pkg/util -run '^$' -bench BenchmarkReplaceOFFlowsInputRendering -benchmem Results on my host show the cost per cycle to the heap: - 1k_flows/join_buffer: 213043 B/op, 3 allocs/op - 1k_flows/stream_reader: 48 B/op, 1 allocs/op - 5k_flows/join_buffer: 1048641 B/op, 3 allocs/op - 5k_flows/stream_reader: 48 B/op, 1 allocs/op 48 Bytes is the total size of the openFlowStdinReader, since it references a slice it does not hold teh slice itself, unlike the previous join buffer logic. Signed-off-by: Tim Rozet --- go-controller/pkg/util/ovs_benchmark_test.go | 82 ++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 go-controller/pkg/util/ovs_benchmark_test.go diff --git a/go-controller/pkg/util/ovs_benchmark_test.go b/go-controller/pkg/util/ovs_benchmark_test.go new file mode 100644 index 0000000000..3fb8a7b514 --- /dev/null +++ b/go-controller/pkg/util/ovs_benchmark_test.go @@ -0,0 +1,82 @@ +package util + +import ( + "bytes" + "io" + "strings" + "testing" +) + +var benchmarkFlowBytesSink int64 +var benchmarkFlowCountSink int + +func BenchmarkReplaceOFFlowsInputRendering(b *testing.B) { + benchCases := []struct { + name string + flowCount int + }{ + { + name: "1k_flows", + flowCount: 1000, + }, + { + name: "5k_flows", + flowCount: 5000, + }, + } + + for _, tc := range benchCases { + flows := makeBenchmarkFlows(tc.flowCount) + totalBytes := benchmarkFlowsBytes(flows) + + b.Run(tc.name+"/join_buffer", func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(totalBytes) + for i := 0; i < b.N; i++ { + stdin := &bytes.Buffer{} + stdin.Write([]byte(strings.Join(flows, "\n"))) + written, err := io.Copy(io.Discard, stdin) + if err != nil { + b.Fatalf("failed to drain old flow payload: %v", err) + } + benchmarkFlowBytesSink = written + benchmarkFlowCountSink = stdin.Len() + } + }) + + b.Run(tc.name+"/stream_reader", func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(totalBytes) + for i := 0; i < b.N; i++ { + stdin := &openFlowStdinReader{flows: flows} + written, err := io.Copy(io.Discard, stdin) + if err != nil { + b.Fatalf("failed to drain streaming flow payload: %v", err) + } + benchmarkFlowBytesSink = written + benchmarkFlowCountSink = len(flows) + } + }) + } +} + +func makeBenchmarkFlows(flowCount int) []string { + flows := make([]string, flowCount) + // Keep each flow moderately long to emulate real replace-flows payload size. + const flowSuffix = ",ip,nw_src=10.128.0.0/14,tp_dst=8080,actions=ct(commit),output:2" + for i := 0; i < flowCount; i++ { + flows[i] = "table=0,priority=100,in_port=1,reg0=0x1" + flowSuffix + } + return flows +} + +func benchmarkFlowsBytes(flows []string) int64 { + if len(flows) == 0 { + return 0 + } + total := len(flows) - 1 + for _, flow := range flows { + total += len(flow) + } + return int64(total) +} From 2b1f4a849975efd23ebc7a585687156fc6924028 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 4 Feb 2026 17:54:15 -0500 Subject: [PATCH 37/59] e2e: clean up stale networks before VRF-Lite BGP tests When a VRF-Lite test times out during cleanup, Docker networks may be left behind. On retry, new networks with the same subnets fail to create. Add ListNetworks() to the infraprovider API and use it to find and delete any stale networks with matching subnets before creating new ones in BeforeEach. Signed-off-by: Ihar Hrachyshka Assisted-by: opus (claude-opus-4-5-20251101) --- test/e2e/infraprovider/api/api.go | 2 ++ test/e2e/infraprovider/providers/kind/kind.go | 26 +++++++++++--- test/e2e/route_advertisements.go | 36 +++++++++++++++---- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/test/e2e/infraprovider/api/api.go b/test/e2e/infraprovider/api/api.go index 2a38ef6595..99030444c2 100644 --- a/test/e2e/infraprovider/api/api.go +++ b/test/e2e/infraprovider/api/api.go @@ -18,6 +18,8 @@ type Provider interface { // PrimaryNetwork returns OVN-Kubernetes primary infrastructure network information PrimaryNetwork() (Network, error) + // ListNetworks returns the names of all networks + ListNetworks() ([]string, error) // GetNetwork returns a network GetNetwork(name string) (Network, error) // GetExternalContainerNetworkInterface fetches network interface information from the external container attached to a specific network diff --git a/test/e2e/infraprovider/providers/kind/kind.go b/test/e2e/infraprovider/providers/kind/kind.go index 14e850e496..31c28a04c5 100644 --- a/test/e2e/infraprovider/providers/kind/kind.go +++ b/test/e2e/infraprovider/providers/kind/kind.go @@ -68,6 +68,10 @@ func (k *kind) GetNetwork(name string) (api.Network, error) { return getNetwork(name) } +func (k *kind) ListNetworks() ([]string, error) { + return listNetworks() +} + func (k *kind) GetExternalContainerNetworkInterface(container api.ExternalContainer, network api.Network) (api.NetworkInterface, error) { return getNetworkInterface(container.Name, network.Name()) } @@ -629,13 +633,27 @@ func doesContainerNameExist(name string) (bool, error) { return state != "", nil } +func listNetworks() ([]string, error) { + output, err := exec.Command(containerengine.Get().String(), "network", "ls", "--format", nameFormat).CombinedOutput() + if err != nil { + return nil, fmt.Errorf("failed to list networks: %w", err) + } + var networks []string + for _, name := range strings.Split(strings.TrimSpace(string(output)), "\n") { + if name != "" { + networks = append(networks, name) + } + } + return networks, nil +} + func doesNetworkExist(networkName string) (bool, error) { - dataBytes, err := exec.Command(containerengine.Get().String(), "network", "ls", "--format", nameFormat).CombinedOutput() + networks, err := listNetworks() if err != nil { - return false, fmt.Errorf("failed to list networks: %w", err) + return false, err } - for _, existingNetworkName := range strings.Split(strings.Trim(string(dataBytes), "\n"), "\n") { - if existingNetworkName == networkName { + for _, name := range networks { + if name == networkName { return true, nil } } diff --git a/test/e2e/route_advertisements.go b/test/e2e/route_advertisements.go index b1cb225e52..11826272e8 100644 --- a/test/e2e/route_advertisements.go +++ b/test/e2e/route_advertisements.go @@ -1925,6 +1925,19 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA // isolation doesn't cut it. macvlan driver might be a better option. bgpServerSubnetIPv4 = "172.38.0.0/16" bgpServerSubnetIPv6 = "fc00:f853:ccd:38::/64" + // Additional subnets used in nested "When there is other network" tests + otherBGPPeerSubnetIPv4 = "172.136.0.0/16" + otherBGPPeerSubnetIPv6 = "fc00:f853:ccd:136::/64" + otherBGPServerSubnetIPv4 = "172.138.0.0/16" + otherBGPServerSubnetIPv6 = "fc00:f853:ccd:138::/64" + ) + + // staleSubnets lists all subnets that may be left behind if a test times out during cleanup. + staleSubnets := sets.New( + bgpPeerSubnetIPv4, bgpPeerSubnetIPv6, + bgpServerSubnetIPv4, bgpServerSubnetIPv6, + otherBGPPeerSubnetIPv4, otherBGPPeerSubnetIPv6, + otherBGPServerSubnetIPv4, otherBGPServerSubnetIPv6, ) f := wrappedTestFramework(baseName) @@ -1944,6 +1957,21 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA testNetworkName = testBaseName bgpServerName = testNetworkName + "-bgpserver" + // Clean up any stale networks from previous test attempts that may have failed during cleanup. + networkNames, err := infraprovider.Get().ListNetworks() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for _, name := range networkNames { + network, err := infraprovider.Get().GetNetwork(name) + if err != nil { + continue + } + v4, v6, _ := network.IPv4IPv6Subnets() + if staleSubnets.Has(v4) || staleSubnets.Has(v6) { + framework.Logf("Cleaning up stale network %q with subnets %s/%s", name, v4, v6) + gomega.Expect(ictx.DeleteNetwork(network)).To(gomega.Succeed()) + } + } + // we will create a agnhost server on an extra network peered with BGP ginkgo.By("Running a BGP network with an agnhost server") bgpPeerCIDRs := []string{bgpPeerSubnetIPv4, bgpPeerSubnetIPv6} @@ -2279,12 +2307,8 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA ginkgo.Describe("When there is other network", func() { const ( - otherBGPPeerSubnetIPv4 = "172.136.0.0/16" - otherBGPPeerSubnetIPv6 = "fc00:f853:ccd:136::/64" - otherBGPServerSubnetIPv4 = "172.138.0.0/16" - otherBGPServerSubnetIPv6 = "fc00:f853:ccd:138::/64" - otherUDNCIDRv4 = "103.203.0.0/16" - otherUDNCIDRv6 = "2014:200:200::0/60" + otherUDNCIDRv4 = "103.203.0.0/16" + otherUDNCIDRv6 = "2014:200:200::0/60" ) var ( From 3b98daec90ff1357b24343f19a68bd9b865cb0ff Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 13 Feb 2026 08:51:19 -0500 Subject: [PATCH 38/59] Fixes race with egress ip + node ip mgr There is a race where: 1. cluster manager assigns an egress IP to a node 2. the node updates its secondary-host-egress-ips or bridge-egress-ips annotations 3. the node assigns the IP to the interface 4. node ip manager sees the netlink event, then checks if the IP in the annotations from 2 5. If the informer cache is not updated yet then ip manager will see the IP is not in the annotation and misinterpret it as a host-cidr. 6. The host is unlabeled as egress ip assignable. 7. Egress IP cluster manager when it goes to reassign the IP, checks the host-cidrs annotation to make sure the IP is not a host IP: isEgressIPAddrConflict and fails. This fixes it by narrowing the gap by detecting when the annotation changes, then signaling node ip mgr to sync. Additionally, do not guard deletion on if it is a host cidr or not. The IP shouldn't exist anyway in the host-cidrs, so it is safe to delete and prevents a stale state where the wrong IP persists in the annotation. Fixes: #5966 Signed-off-by: Tim Rozet --- .../pkg/node/node_ip_handler_linux.go | 46 ++++++++++++++---- .../pkg/node/node_ip_handler_linux_test.go | 48 +++++++++++++++++++ 2 files changed, 85 insertions(+), 9 deletions(-) diff --git a/go-controller/pkg/node/node_ip_handler_linux.go b/go-controller/pkg/node/node_ip_handler_linux.go index dda4e69da0..d46c758780 100644 --- a/go-controller/pkg/node/node_ip_handler_linux.go +++ b/go-controller/pkg/node/node_ip_handler_linux.go @@ -12,6 +12,7 @@ import ( "github.com/vishvananda/netlink" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -96,10 +97,10 @@ func (c *addressManager) addAddr(ipnet net.IPNet, linkIndex int) bool { // removes IP from address manager // returns true if there was an update -func (c *addressManager) delAddr(ipnet net.IPNet, linkIndex int) bool { +func (c *addressManager) delAddr(ipnet net.IPNet) bool { c.Lock() defer c.Unlock() - if c.cidrs.Has(ipnet.String()) && c.isValidNodeIP(ipnet.IP, linkIndex) { + if c.cidrs.Has(ipnet.String()) { klog.Infof("Removing IP: %s, from node IP manager", ipnet) c.cidrs.Delete(ipnet.String()) return true @@ -134,7 +135,7 @@ func (c *addressManager) Run(stopChan <-chan struct{}, doneWg *sync.WaitGroup) { return } - c.addHandlerForPrimaryAddrChange() + c.addHandlerForAddrChange() doneWg.Add(1) go func() { c.runInternal(stopChan, c.getNetlinkAddrSubFunc(stopChan)) @@ -172,7 +173,7 @@ func (c *addressManager) runInternal(stopChan <-chan struct{}, subscribe subscri if a.NewAddr { addrChanged = c.addAddr(a.LinkAddress, a.LinkIndex) } else { - addrChanged = c.delAddr(a.LinkAddress, a.LinkIndex) + addrChanged = c.delAddr(a.LinkAddress) } c.handleNodePrimaryAddrChange() @@ -218,14 +219,24 @@ func (c *addressManager) getNetlinkAddrSubFunc(stopChan <-chan struct{}) func() } } -// addHandlerForPrimaryAddrChange handles reconfiguration of a node primary IP address change -func (c *addressManager) addHandlerForPrimaryAddrChange() { +// addHandlerForAddrChange handles reconfiguration of a node primary IP address change or egress IP annotation changes +func (c *addressManager) addHandlerForAddrChange() { // Add an event handler to the node informer. This is needed for cases where users first update the node's IP // address but only later update kubelet configuration and restart kubelet (which in turn will update the reported // IP address inside the node's status field). + // It is also needed to cover gaps when the egress IPs are updated in annotations, in order to + // maintain a consistent host-cidrs set, without stale Egress IPs. nodeInformer := c.watchFactory.NodeInformer() _, err := nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ - UpdateFunc: func(_, _ interface{}) { + UpdateFunc: func(oldObj, newObj interface{}) { + oldNode, oldOK := oldObj.(*corev1.Node) + newNode, newOK := newObj.(*corev1.Node) + if oldOK && newOK && newNode.Name == c.nodeName && nodeEgressIPAnnotationsChanged(oldNode, newNode) { + klog.V(5).Infof("Node %s egress IP annotations changed, syncing node IP manager", c.nodeName) + c.sync() + // c.sync() already calls c.handleNodePrimaryAddrChange, so safe to return + return + } c.handleNodePrimaryAddrChange() }, }) @@ -234,6 +245,20 @@ func (c *addressManager) addHandlerForPrimaryAddrChange() { } } +func nodeEgressIPAnnotationsChanged(oldNode, newNode *corev1.Node) bool { + if oldNode == nil || newNode == nil { + return false + } + for _, key := range []string{util.OVNNodeSecondaryHostEgressIPs, util.OVNNodeBridgeEgressIPs} { + oldVal, oldSet := oldNode.Annotations[key] + newVal, newSet := newNode.Annotations[key] + if oldSet != newSet || oldVal != newVal { + return true + } + } + return false +} + // updates OVN's EncapIP if the node IP changed func (c *addressManager) handleNodePrimaryAddrChange() { c.Lock() @@ -381,8 +406,11 @@ func (c *addressManager) nodePrimaryAddrChanged() (bool, error) { return true, nil } -// detects if the IP is valid for a node -// excludes things like local IPs, mgmt port ip, special masquerade IP and Egress IPs for non-ovs type interfaces +// isValidNodeIP detects if the IP is valid for a node. +// It excludes things like local IPs, mgmt port ip, special masquerade IP and Egress IPs +// for non-ovs type interfaces. +// Note, it possible that the node annotations may not be up to date when this check is executed. +// For this reason, sync is triggered on annotation change via addHandlerForAddrChange. func (c *addressManager) isValidNodeIP(addr net.IP, linkIndex int) bool { if addr == nil { return false diff --git a/go-controller/pkg/node/node_ip_handler_linux_test.go b/go-controller/pkg/node/node_ip_handler_linux_test.go index c78307cca1..fd549ed7df 100644 --- a/go-controller/pkg/node/node_ip_handler_linux_test.go +++ b/go-controller/pkg/node/node_ip_handler_linux_test.go @@ -170,6 +170,54 @@ var _ = Describe("Node IP Handler event tests", func() { }) }) +var _ = Describe("Node IP Handler helper tests", func() { + const nodeName = "node1" + + It("removes cached IPs even when they are no longer valid node IPs", func() { + Expect(config.PrepareTestConfig()).To(Succeed()) + tc := configureKubeOVNContext(nodeName, false) + defer tc.watchFactory.Shutdown() + + tc.ipManager.Lock() + tc.ipManager.cidrs.Insert(tc.mgmtPortIP4.String()) + tc.ipManager.Unlock() + + Expect(tc.ipManager.delAddr(*tc.mgmtPortIP4)).To(BeTrue()) + _, networks := tc.ipManager.ListAddresses() + Expect(networks).To(BeEmpty()) + }) + + It("syncs stale host-cidrs when egress IP annotations change", func() { + Expect(config.PrepareTestConfig()).To(Succeed()) + tc := configureKubeOVNContext(nodeName, false) + defer tc.watchFactory.Shutdown() + + tc.ipManager.addHandlerForAddrChange() + + staleEIP := "2001:db8:abcd:1234:c001::" + node, err := tc.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + + nodeToUpdate := node.DeepCopy() + nodeToUpdate.Annotations[util.OVNNodeHostCIDRs] = fmt.Sprintf("[\"%s\", \"%s\", \"%s/128\"]", "10.1.1.10/24", "2001:db8::10/64", staleEIP) + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = fmt.Sprintf("[\"%s\"]", staleEIP) + _, err = tc.fakeClient.CoreV1().Nodes().Update(context.TODO(), nodeToUpdate, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() bool { + updatedNode, err := tc.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + return false + } + hostIPs, err := util.ParseNodeHostCIDRsDropNetMask(updatedNode) + if err != nil { + return false + } + return !hostIPs.Has(staleEIP) + }, 5).Should(BeTrue()) + }) +}) + var _ = Describe("Node IP Handler tests", func() { // To ensure that variables don't leak between parallel Ginkgo specs, // put all test context into a single struct and reference it via From 7feb1238daeb88bbcb60ae748304a45e861d914e Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 13 Feb 2026 09:08:40 -0500 Subject: [PATCH 39/59] Continue trying to assign egressIP on conflict In cluster manager, egress IP was halting and returning error if it detected an egress IP conflicted with a host IP. Change this to continue to try to see if the next EgressIP will work. Keep posting warnings/events about conflicting IPs though. Signed-off-by: Tim Rozet --- .../pkg/clustermanager/egressip_controller.go | 4 ++-- .../egressip_controller_test.go | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go index ded7851375..891ec2c0ce 100644 --- a/go-controller/pkg/clustermanager/egressip_controller.go +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -1242,7 +1242,7 @@ func (eIPC *egressIPClusterController) assignEgressIPs(name string, egressIPs [] eIPC.recorder.Eventf(&eIPRef, corev1.EventTypeWarning, "EgressIPConflict", "Egress IP %s with IP "+ "%v is conflicting with a host (%s) IP address and will not be assigned", name, eIP, conflictedHost) klog.Errorf("Egress IP: %v address is already assigned on an interface on node %s", eIP, conflictedHost) - return assignments + continue } if status, exists := existingAllocations[eIP.String()]; exists { // On public clouds we will re-process assignments for the same IP @@ -1294,7 +1294,7 @@ func (eIPC *egressIPClusterController) assignEgressIPs(name string, egressIPs [] "IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node, ) klog.Errorf("IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node) - return assignments + continue } } // Egress IP for secondary host networks is only available on baremetal environments diff --git a/go-controller/pkg/clustermanager/egressip_controller_test.go b/go-controller/pkg/clustermanager/egressip_controller_test.go index 593671f7b8..95d06bc0b1 100644 --- a/go-controller/pkg/clustermanager/egressip_controller_test.go +++ b/go-controller/pkg/clustermanager/egressip_controller_test.go @@ -1367,12 +1367,23 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { I0212 20:22:37.643187 1837759 egressip_controller.go:1173] Current assignments are: map[] I0212 20:22:37.643205 1837759 egressip_controller.go:1175] Will attempt assignment for egress IP: 192.168.126.51 E0212 20:22:37.643254 1837759 egressip_controller.go:1190] Egress IP: 192.168.126.51 address is already assigned on an interface on node node2*/ - gomega.Eventually(fakeClusterManagerOVN.fakeRecorder.Events).Should(gomega.HaveLen(4)) - for i := 0; i < 4; i++ { + gomega.Eventually(fakeClusterManagerOVN.fakeRecorder.Events).Should(gomega.HaveLen(8)) + conflictCount := 0 + noMatchingCount := 0 + for i := 0; i < 8; i++ { recordedEvent := <-fakeClusterManagerOVN.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring( - "EgressIPConflict Egress IP egressip with IP 192.168.126.51 is conflicting with a host (node2) IP address and will not be assigned")) + gomega.Expect(recordedEvent).To(gomega.SatisfyAny( + gomega.ContainSubstring("EgressIPConflict Egress IP egressip with IP 192.168.126.51 is conflicting with a host (node2) IP address and will not be assigned"), + gomega.ContainSubstring("NoMatchingNodeFound No matching nodes found, which can host any of the egress IPs: [192.168.126.51] for object EgressIP: egressip"))) + if strings.Contains(recordedEvent, "EgressIPConflict") { + conflictCount++ + } + if strings.Contains(recordedEvent, "NoMatchingNodeFound") { + noMatchingCount++ + } } + gomega.Expect(conflictCount).To(gomega.Equal(4)) + gomega.Expect(noMatchingCount).To(gomega.Equal(4)) return nil } From fd2701582dca1961435745697b018e636e045dfe Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Wed, 11 Feb 2026 18:02:04 +0100 Subject: [PATCH 40/59] Minimize ACLs by combining ipBlocks into single ACL Combine all ipBlocks in a NetworkPolicy rule into single ACL instead of creating one ACL per ipBlock. This reduces ACL bloat when policy having multiple ipBlocks for ingress/egress rules. Signed-off-by: Periyasamy Palanisamy --- .../pkg/libovsdb/ops/db_object_types.go | 5 +- go-controller/pkg/ovn/gress_policy.go | 48 +++-- go-controller/pkg/ovn/gress_policy_test.go | 19 +- go-controller/pkg/ovn/policy_test.go | 182 +++++++++++++++++- 4 files changed, 212 insertions(+), 42 deletions(-) diff --git a/go-controller/pkg/libovsdb/ops/db_object_types.go b/go-controller/pkg/libovsdb/ops/db_object_types.go index 0a31560d1f..375f845ef8 100644 --- a/go-controller/pkg/libovsdb/ops/db_object_types.go +++ b/go-controller/pkg/libovsdb/ops/db_object_types.go @@ -250,9 +250,10 @@ var ACLNetworkPolicyPortIndex = newObjectIDsType(acl, NetworkPolicyPortIndexOwne // ingress/egress + NetworkPolicy[In/E]gressRule idx - defines given gressPolicy. // ACLs are created for gp.portPolicies which are grouped by protocol: // - for empty policy (no selectors and no ip blocks) - empty ACL (see allIPsMatch) +// with idx=emptyIdx (-1) // OR -// - all selector-based peers ACL -// - for every IPBlock +1 ACL +// - all selector-based peers ACL with idx=emptyIdx (-1) +// - all ipBlocks combined into a single ACL with idx=ipBlockCombinedIdx (-2) // Therefore unique id for a given gressPolicy is protocol name + IPBlock idx // (protocol will be "None" if no port policy is defined, and empty policy and all // selector-based peers ACLs will have idx=-1) diff --git a/go-controller/pkg/ovn/gress_policy.go b/go-controller/pkg/ovn/gress_policy.go index ad20fadfb3..b1f844123c 100644 --- a/go-controller/pkg/ovn/gress_policy.go +++ b/go-controller/pkg/ovn/gress_policy.go @@ -22,6 +22,11 @@ import ( const ( // emptyIdx is used to create ACL for gressPolicy that doesn't have ipBlocks emptyIdx = -1 + // ipBlockCombinedIdx is used when creating an ACL for a gressPolicy + // that contains ipBlocks. Previously, one ACL was created per ipBlock. + // This is changed to create a single combined ACL for all ipBlocks, + // and this special index value identifies those new ACLs. + ipBlockCombinedIdx = -2 ) type gressPolicy struct { @@ -167,14 +172,14 @@ func (gp *gressPolicy) allIPsMatch() string { } } -func (gp *gressPolicy) getMatchFromIPBlock(lportMatch, l4Match string) []string { +func (gp *gressPolicy) getMatchFromIPBlock(lportMatch, l4Match string) string { var direction string if gp.policyType == knet.PolicyTypeIngress { direction = "src" } else { direction = "dst" } - var matchStrings []string + var ipBlockMatches []string var matchStr, ipVersion string for _, ipBlock := range gp.ipBlocks { if utilnet.IsIPv6CIDRString(ipBlock.CIDR) { @@ -185,17 +190,22 @@ func (gp *gressPolicy) getMatchFromIPBlock(lportMatch, l4Match string) []string if len(ipBlock.Except) == 0 { matchStr = fmt.Sprintf("%s.%s == %s", ipVersion, direction, ipBlock.CIDR) } else { - matchStr = fmt.Sprintf("%s.%s == %s && %s.%s != {%s}", ipVersion, direction, ipBlock.CIDR, + matchStr = fmt.Sprintf("(%s.%s == %s && %s.%s != {%s})", ipVersion, direction, ipBlock.CIDR, ipVersion, direction, strings.Join(ipBlock.Except, ", ")) } - if l4Match == libovsdbutil.UnspecifiedL4Match { - matchStr = fmt.Sprintf("%s && %s", matchStr, lportMatch) - } else { - matchStr = fmt.Sprintf("%s && %s && %s", matchStr, l4Match, lportMatch) - } - matchStrings = append(matchStrings, matchStr) + ipBlockMatches = append(ipBlockMatches, matchStr) } - return matchStrings + var l3Match string + if len(ipBlockMatches) == 1 { + l3Match = ipBlockMatches[0] + } else { + l3Match = fmt.Sprintf("(%s)", strings.Join(ipBlockMatches, " || ")) + } + + if l4Match == libovsdbutil.UnspecifiedL4Match { + return fmt.Sprintf("%s && %s", l3Match, lportMatch) + } + return fmt.Sprintf("%s && %s && %s", l3Match, l4Match, lportMatch) } // addNamespaceAddressSet adds a namespace address set to the gress policy. @@ -285,13 +295,11 @@ func (gp *gressPolicy) buildLocalPodACLs(portGroupName string, aclLogging *libov for protocol, l4Match := range libovsdbutil.GetL4MatchesFromNetworkPolicyPorts(gp.portPolicies) { if len(gp.ipBlocks) > 0 { // Add ACL allow rule for IPBlock CIDR - ipBlockMatches := gp.getMatchFromIPBlock(lportMatch, l4Match) - for ipBlockIdx, ipBlockMatch := range ipBlockMatches { - aclIDs := gp.getNetpolACLDbIDs(ipBlockIdx, protocol) - acl := libovsdbutil.BuildACLWithDefaultTier(aclIDs, types.DefaultAllowPriority, ipBlockMatch, action, - aclLogging, gp.aclPipeline) - createdACLs = append(createdACLs, acl) - } + ipBlockMatch := gp.getMatchFromIPBlock(lportMatch, l4Match) + aclIDs := gp.getNetpolACLDbIDs(ipBlockCombinedIdx, protocol) + acl := libovsdbutil.BuildACLWithDefaultTier(aclIDs, types.DefaultAllowPriority, ipBlockMatch, action, + aclLogging, gp.aclPipeline) + createdACLs = append(createdACLs, acl) } // if there are pod/namespace selector, then allow packets from/to that address_set or // if the NetworkPolicyPeer is empty, then allow from all sources or to all destinations. @@ -334,10 +342,10 @@ func (gp *gressPolicy) getNetpolACLDbIDs(ipBlockIdx int, protocol string) *libov // gress rule index libovsdbops.GressIdxKey: strconv.Itoa(gp.idx), // acls are created for every gp.portPolicies which are grouped by protocol: - // - for empty policy (no selectors and no ip blocks) - empty ACL + // - for empty policy (no selectors and no ip blocks) - empty ACL with idx=emptyIdx (-1) // OR - // - all selector-based peers ACL - // - for every IPBlock +1 ACL + // - all selector-based peers ACL with idx=emptyIdx (-1) + // - all ipBlocks combined into a single ACL with idx=ipBlockCombinedIdx (-2) // Therefore unique id for a given gressPolicy is protocol name + IPBlock idx // (protocol will be "None" if no port policy is defined, and empty policy and all // selector-based peers ACLs will have idx=-1) diff --git a/go-controller/pkg/ovn/gress_policy_test.go b/go-controller/pkg/ovn/gress_policy_test.go index 14b2a65a7c..f45be5385a 100644 --- a/go-controller/pkg/ovn/gress_policy_test.go +++ b/go-controller/pkg/ovn/gress_policy_test.go @@ -16,7 +16,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { ipBlocks []*knet.IPBlock lportMatch string l4Match string - expected []string + expected string }{ { desc: "IPv4 only no except", @@ -27,7 +27,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && input && fake"}, + expected: "ip4.src == 0.0.0.0/0 && input && fake", }, { desc: "multiple IPv4 only no except", @@ -41,8 +41,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && input && fake", - "ip4.src == 10.1.0.0/16 && input && fake"}, + expected: "(ip4.src == 0.0.0.0/0 || ip4.src == 10.1.0.0/16) && input && fake", }, { desc: "IPv6 only no except", @@ -53,7 +52,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip6.src == fd00:10:244:3::49/32 && input && fake"}, + expected: "ip6.src == fd00:10:244:3::49/32 && input && fake", }, { desc: "mixed IPv4 and IPv6 no except", @@ -67,8 +66,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip6.src == ::/0 && input && fake", - "ip4.src == 0.0.0.0/0 && input && fake"}, + expected: "(ip6.src == ::/0 || ip4.src == 0.0.0.0/0) && input && fake", }, { desc: "IPv4 only with except", @@ -80,7 +78,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16} && input && fake"}, + expected: "(ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16}) && input && fake", }, { desc: "multiple IPv4 with except", @@ -95,8 +93,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16} && input && fake", - "ip4.src == 10.1.0.0/16 && input && fake"}, + expected: "((ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16}) || ip4.src == 10.1.0.0/16) && input && fake", }, { desc: "IPv4 with IPv4 except", @@ -108,7 +105,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16} && input && fake"}, + expected: "(ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16}) && input && fake", }, } diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index c02af4575f..af7923a5cf 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -6,6 +6,7 @@ import ( "net" "runtime" "sort" + "strings" "time" "github.com/onsi/ginkgo/v2" @@ -291,10 +292,24 @@ func getGressACLs(gressIdx int, peers []knet.NetworkPolicyPeer, policyType knet. acl.UUID = dbIDs.String() + "-UUID" acls = append(acls, acl) } - for i, ipBlock := range ipBlocks { - match := fmt.Sprintf("ip4.%s == %s && %s == @%s", ipDir, ipBlock, portDir, pgName) + if len(ipBlocks) > 0 { + var ipBlockMatches []string + for _, ipBlock := range ipBlocks { + ipVersion := "ip4" + if utilnet.IsIPv6CIDRString(ipBlock) { + ipVersion = "ip6" + } + ipBlockMatches = append(ipBlockMatches, fmt.Sprintf("%s.%s == %s", ipVersion, ipDir, ipBlock)) + } + var match string + if len(ipBlockMatches) == 1 { + match = ipBlockMatches[0] + } else { + match = fmt.Sprintf("(%s)", strings.Join(ipBlockMatches, " || ")) + } + match = fmt.Sprintf("%s && %s == @%s", match, portDir, pgName) action := allowAction(params.statelessNetPol) - dbIDs := gp.getNetpolACLDbIDs(i, libovsdbutil.UnspecifiedL4Protocol) + dbIDs := gp.getNetpolACLDbIDs(ipBlockCombinedIdx, libovsdbutil.UnspecifiedL4Protocol) acl := libovsdbops.BuildACL( libovsdbutil.GetACLName(dbIDs), direction, @@ -361,6 +376,17 @@ func getPolicyData(params *netpolDataParams) []libovsdbtest.TestData { acls = append(acls, getGressACLs(i, egress.To, knet.PolicyTypeEgress, params)...) } + pg := getPolicyPortGroup(params, acls) + + data := []libovsdbtest.TestData{} + for _, acl := range acls { + data = append(data, acl) + } + data = append(data, pg) + return data +} + +func getPolicyPortGroup(params *netpolDataParams, acls []*nbdb.ACL) *nbdb.PortGroup { lsps := []*nbdb.LogicalSwitchPort{} for _, uuid := range params.localPortUUIDs { lsps = append(lsps, &nbdb.LogicalSwitchPort{UUID: uuid}) @@ -375,12 +401,7 @@ func getPolicyData(params *netpolDataParams) []libovsdbtest.TestData { ) pg.UUID = pg.Name + "-UUID" - data := []libovsdbtest.TestData{} - for _, acl := range acls { - data = append(data, acl) - } - data = append(data, pg) - return data + return pg } func newNetpolDataParams(networkPolicy *knet.NetworkPolicy) *netpolDataParams { @@ -956,6 +977,149 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { } gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) }) + + ginkgo.It("reconciles existing networkPolicies with has legacy ipBlock ACLs", func() { + app.Action = func(*cli.Context) error { + namespace1 := *newNamespace(namespaceName1) + namespace1AddressSetv4, _ := buildNamespaceAddressSets(namespace1.Name, nil) + peer := knet.NetworkPolicyPeer{ + IPBlock: &knet.IPBlock{ + CIDR: "1.1.1.1", + }, + } + // equivalent rules in one peer + networkPolicy1 := newNetworkPolicy(netPolicyName1, namespace1.Name, metav1.LabelSelector{}, + []knet.NetworkPolicyIngressRule{{ + From: []knet.NetworkPolicyPeer{peer, peer}, + }}, nil) + // equivalent rules in different peers + networkPolicy2 := newNetworkPolicy(netPolicyName2, namespace1.Name, metav1.LabelSelector{}, + []knet.NetworkPolicyIngressRule{ + { + From: []knet.NetworkPolicyPeer{peer}, + }, + { + From: []knet.NetworkPolicyPeer{peer}, + }, + }, nil) + initialData := initialDB.NBData + initialData = append(initialData, namespace1AddressSetv4) + defaultDenyExpectedData := getDefaultDenyDataMultiplePolicies([]*knet.NetworkPolicy{networkPolicy1, networkPolicy2}) + initialData = append(initialData, defaultDenyExpectedData...) + + // NetworkPolicy 1 contains a single gress policy that previously + // created one legacy ACL per ipBlock. Simulate two legacy ACLs + // corresponding to ipBlock indexes 0 and 1 of the gress policy. + // ACL1 => libovsdbops.GressIdxKey: 0, libovsdbops.IpBlockIndexKey: 0 + // ACL2 => libovsdbops.GressIdxKey: 0, libovsdbops.IpBlockIndexKey: 1 + netInfo := &util.DefaultNetInfo{} + fakeController := getFakeBaseController(netInfo) + controllerName := getNetworkControllerName(netInfo.GetNetworkName()) + pgName1 := fakeController.getNetworkPolicyPGName(namespace1.Name, networkPolicy1.Name) + gp1 := gressPolicy{ + policyNamespace: networkPolicy1.Namespace, + policyName: networkPolicy1.Name, + policyType: knet.PolicyTypeIngress, + idx: 0, + controllerName: controllerName, + } + var legacyACLPolicy1 []*nbdb.ACL + for idx := 0; idx < 2; idx++ { + legacyACLIDs := gp1.getNetpolACLDbIDs(idx, libovsdbutil.UnspecifiedL4Protocol) + legacyACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(legacyACLIDs), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + fmt.Sprintf("ip4.src == 1.1.1.1 && outport == @%s", pgName1), + nbdb.ACLActionAllowRelated, + types.OvnACLLoggingMeter, + "", + false, + legacyACLIDs.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + legacyACL.UUID = legacyACLIDs.String() + "-UUID" + initialData = append(initialData, legacyACL) + legacyACLPolicy1 = append(legacyACLPolicy1, legacyACL) + } + pgNetworkPolicy1 := getPolicyPortGroup(newNetpolDataParams(networkPolicy1), legacyACLPolicy1) + initialData = append(initialData, pgNetworkPolicy1) + + // NetworkPolicy 2 contains two gress policies, each with one legacy + // ACL per ipBlock. Simulate two legacy ACL corresponding to gress + // policy indexes 0 and 1, respectively. + // ACL1 => libovsdbops.GressIdxKey: 0, libovsdbops.IpBlockIndexKey: 0 + // ACL2 => libovsdbops.GressIdxKey: 1, libovsdbops.IpBlockIndexKey: 0 + pgName2 := fakeController.getNetworkPolicyPGName(namespace1.Name, networkPolicy2.Name) + firstgp2 := gressPolicy{ + policyNamespace: networkPolicy2.Namespace, + policyName: networkPolicy2.Name, + policyType: knet.PolicyTypeIngress, + idx: 0, + controllerName: controllerName, + } + secondgp2 := gressPolicy{ + policyNamespace: networkPolicy2.Namespace, + policyName: networkPolicy2.Name, + policyType: knet.PolicyTypeIngress, + idx: 1, + controllerName: controllerName, + } + legacyACLID := firstgp2.getNetpolACLDbIDs(0, libovsdbutil.UnspecifiedL4Protocol) + legacyACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(legacyACLID), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + fmt.Sprintf("ip4.src == 1.1.1.1 && outport == @%s", pgName2), + nbdb.ACLActionAllowRelated, + types.OvnACLLoggingMeter, + "", + false, + legacyACLID.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + legacyACL.UUID = legacyACLID.String() + "-UUID" + initialData = append(initialData, legacyACL) + + legacyACLID2 := secondgp2.getNetpolACLDbIDs(0, libovsdbutil.UnspecifiedL4Protocol) + legacyACL2 := libovsdbops.BuildACL( + libovsdbutil.GetACLName(legacyACLID2), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + fmt.Sprintf("ip4.src == 1.1.1.1 && outport == @%s", pgName2), + nbdb.ACLActionAllowRelated, + types.OvnACLLoggingMeter, + "", + false, + legacyACLID2.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + legacyACL2.UUID = legacyACLID2.String() + "-UUID" + initialData = append(initialData, legacyACL2) + pgNetworkPolicy2 := getPolicyPortGroup(newNetpolDataParams(networkPolicy2), []*nbdb.ACL{legacyACL, legacyACL2}) + initialData = append(initialData, pgNetworkPolicy2) + + startOvn(libovsdbtest.TestSetup{NBData: initialData}, []corev1.Namespace{namespace1}, + []knet.NetworkPolicy{*networkPolicy1, *networkPolicy2}, + nil, nil) + + // check the initial data is updated and all legacy ACLs should be cleaned up + gressPolicy1ExpectedData := getPolicyData(newNetpolDataParams(networkPolicy1)) + gressPolicy2ExpectedData := getPolicyData(newNetpolDataParams(networkPolicy2)) + finalData := initialDB.NBData + finalData = append(finalData, namespace1AddressSetv4) + finalData = append(finalData, gressPolicy1ExpectedData...) + finalData = append(finalData, gressPolicy2ExpectedData...) + finalData = append(finalData, defaultDenyExpectedData...) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalData)) + + return nil + } + gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) + }) }) ginkgo.Context("during execution", func() { From 843ceff70ae02a2ad89b128f7f2c0b98e0107556 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 18 Feb 2026 10:04:05 -0500 Subject: [PATCH 41/59] Trivial E2E egress IP fixes - Changed the MTU test external container to start directly with netexec instead of creating it with pause and launching netexec in a detached goroutine. - We need to set the PMTU sysctl before the socket is opened for the application. - Fixed curlErr that was being shadowed and not evaluated properly. Signed-off-by: Tim Rozet --- test/e2e/egressip.go | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index 6e7d75f147..b795e73bef 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -2160,35 +2160,24 @@ spec: providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() framework.ExpectNoError(err, "failed to get providers primary network") externalContainerPrimary := infraapi.ExternalContainer{Name: "external-container-for-egressip-mtu-test", Image: images.AgnHost(), - Network: providerPrimaryNetwork, CmdArgs: []string{"pause"}, ExtPort: externalContainerPrimaryPort} + Network: providerPrimaryNetwork, RuntimeArgs: []string{"--sysctl", "net.ipv4.ip_no_pmtu_disc=2"}, + CmdArgs: []string{"netexec", httpPort, udpPort}, ExtPort: externalContainerPrimaryPort} externalContainerPrimary, err = providerCtx.CreateExternalContainer(externalContainerPrimary) framework.ExpectNoError(err, "failed to create external container: %s", externalContainerPrimary.String()) - // First disable PMTUD - _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=2"}) - framework.ExpectNoError(err, "disabling PMTUD in the external kind container failed: %v", err) - providerCtx.AddCleanUpFn(func() error { - _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=0"}) - return err - }) - - go func() { - _, _ = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"/agnhost", "netexec", httpPort, udpPort}) - }() - ginkgo.By("Checking connectivity to the external kind container and verify that the source IP is the egress IP") var curlErr error - _ = wait.PollUntilContextTimeout( + err = wait.PollUntilContextTimeout( context.Background(), retryInterval, retryTimeout, true, func(ctx context.Context) (bool, error) { - curlErr := curlAgnHostClientIPFromPod(podNamespace.Name, pod1Name, egressIP1.String(), externalContainerPrimary.GetIPv4(), externalContainerPrimary.GetPortStr()) + curlErr = curlAgnHostClientIPFromPod(podNamespace.Name, pod1Name, egressIP1.String(), externalContainerPrimary.GetIPv4(), externalContainerPrimary.GetPortStr()) return curlErr == nil, nil }, ) - framework.ExpectNoError(curlErr, "connectivity check to the external kind container failed: %v", curlErr) + framework.ExpectNoError(err, "connectivity check to the external kind container failed: %v", curlErr) // We will ask the server to reply with a UDP packet bigger than the pod // network MTU. Since PMTUD has been disabled on the server, the reply From 75d2f8e4e8968b74eeb1dc586623859365e11682 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 19 Feb 2026 15:15:36 -0500 Subject: [PATCH 42/59] e2e: fix static IP collision in ValidatingAdmissionPolicy test Create the static-IP pod before the dynamically-allocated pod to prevent the allocator from assigning the same IP (103.0.0.3) to the unannotated pod first, causing a permanent "provided IP is already allocated" error for the annotated pod. Signed-off-by: Ihar Hrachyshka Assisted-by: opus (claude-opus-4-5-20251101) --- .../network_segmentation_default_network_annotation.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/e2e/network_segmentation_default_network_annotation.go b/test/e2e/network_segmentation_default_network_annotation.go index 4e42658588..fce9005126 100644 --- a/test/e2e/network_segmentation_default_network_annotation.go +++ b/test/e2e/network_segmentation_default_network_annotation.go @@ -177,11 +177,6 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat Expect(err).NotTo(HaveOccurred(), "Should create UserDefinedNetwork") Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udn.Namespace, udn.Name), 5*time.Second, time.Second).Should(Succeed()) - By("Creating a pod without the default-network annotation") - podWithoutAnnotation := e2epod.NewAgnhostPod(f.Namespace.Name, "pod-without-annotation", nil, nil, nil) - podWithoutAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} - podWithoutAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithoutAnnotation) - By("Creating a pod with the default-network annotation") nse := []nadapi.NetworkSelectionElement{{ @@ -200,6 +195,11 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat podWithAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} podWithAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithAnnotation) + By("Creating a pod without the default-network annotation") + podWithoutAnnotation := e2epod.NewAgnhostPod(f.Namespace.Name, "pod-without-annotation", nil, nil, nil) + podWithoutAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} + podWithoutAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithoutAnnotation) + By("Attempting to add the default-network annotation to the pod without annotation") podWithoutAnnotation.Annotations = map[string]string{ "v1.multus-cni.io/default-network": string(marshalledNSE), From 58c5dc9360315bab7fc69cbfab9fdd8ecf69a5b7 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Tue, 13 May 2025 19:37:41 -0400 Subject: [PATCH 43/59] Adds support for bypassing network policy for ICMP/ICMPv6 Now with a new feature option "allow-icmp-network-policy", network policy will not block ICMP/ICMPv6 between pods. This is a global setting that affects all pods and namespaces in a cluster. Signed-off-by: Tim Rozet --- go-controller/pkg/config/config.go | 7 + .../pkg/ovn/base_network_controller_policy.go | 34 ++-- go-controller/pkg/ovn/multipolicy_test.go | 151 +++++++++--------- go-controller/pkg/ovn/policy_stale_test.go | 95 ++++++----- go-controller/pkg/ovn/policy_test.go | 130 ++++++++++----- 5 files changed, 255 insertions(+), 162 deletions(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 55c985a187..2f6cf72f24 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -507,6 +507,7 @@ type OVNKubernetesFeatureConfig struct { EnableServiceTemplateSupport bool `gcfg:"enable-svc-template-support"` EnableObservability bool `gcfg:"enable-observability"` EnableNetworkQoS bool `gcfg:"enable-network-qos"` + AllowICMPNetworkPolicy bool `gcfg:"allow-icmp-network-policy"` // This feature requires a kernel fix https://github.com/torvalds/linux/commit/7f3287db654395f9c5ddd246325ff7889f550286 // to work on a kind cluster. Flag allows to disable it for current CI, will be turned on when github runners have this fix. AdvertisedUDNIsolationMode string `gcfg:"advertised-udn-isolation-mode"` @@ -1267,6 +1268,12 @@ var OVNK8sFeatureFlags = []cli.Flag{ Destination: &cliConfig.OVNKubernetesFeature.EnableStatelessNetPol, Value: OVNKubernetesFeature.EnableStatelessNetPol, }, + &cli.BoolFlag{ + Name: "allow-icmp-network-policy", + Usage: "Allow ICMP/ICMPv6 traffic to bypass NetworkPolicy default-deny rules.", + Destination: &cliConfig.OVNKubernetesFeature.AllowICMPNetworkPolicy, + Value: OVNKubernetesFeature.AllowICMPNetworkPolicy, + }, &cli.BoolFlag{ Name: "enable-interconnect", Usage: "Enable interconnecting multiple zones.", diff --git a/go-controller/pkg/ovn/base_network_controller_policy.go b/go-controller/pkg/ovn/base_network_controller_policy.go index 223f13f6a2..5507c23bc0 100644 --- a/go-controller/pkg/ovn/base_network_controller_policy.go +++ b/go-controller/pkg/ovn/base_network_controller_policy.go @@ -35,7 +35,10 @@ const ( // netpolDefaultDenyACLType is used to distinguish default deny and arp allow acls create for the same port group defaultDenyACL netpolDefaultDenyACLType = "defaultDeny" arpAllowACL netpolDefaultDenyACLType = "arpAllow" + icmpAllowACL netpolDefaultDenyACLType = "icmpAllow" + // icmpAllowPolicyMatch is the match used when creating default allow ICMP and ICMPv6 ACLs for a namespace + icmpAllowPolicyMatch = "(icmp || icmp6)" // arpAllowPolicyMatch is the match used when creating default allow ARP ACLs for a namespace arpAllowPolicyMatch = "(arp || nd)" allowHairpinningACLID = "allow-hairpinning" @@ -383,16 +386,22 @@ func (bnc *BaseNetworkController) defaultDenyPortGroupName(namespace string, acl } func (bnc *BaseNetworkController) buildDenyACLs(namespace, pgName string, aclLogging *libovsdbutil.ACLLoggingLevels, - aclDir libovsdbutil.ACLDirection) (denyACL, allowACL *nbdb.ACL) { + aclDir libovsdbutil.ACLDirection) []*nbdb.ACL { denyMatch := libovsdbutil.GetACLMatch(pgName, "", aclDir) - allowMatch := libovsdbutil.GetACLMatch(pgName, arpAllowPolicyMatch, aclDir) + allowARPMatch := libovsdbutil.GetACLMatch(pgName, arpAllowPolicyMatch, aclDir) aclPipeline := libovsdbutil.ACLDirectionToACLPipeline(aclDir) - denyACL = libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, defaultDenyACL), - types.DefaultDenyPriority, denyMatch, nbdb.ACLActionDrop, aclLogging, aclPipeline) - allowACL = libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, arpAllowACL), - types.DefaultAllowPriority, allowMatch, nbdb.ACLActionAllow, nil, aclPipeline) - return + acls := make([]*nbdb.ACL, 0, 3) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, defaultDenyACL), + types.DefaultDenyPriority, denyMatch, nbdb.ACLActionDrop, aclLogging, aclPipeline)) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, arpAllowACL), + types.DefaultAllowPriority, allowARPMatch, nbdb.ACLActionAllow, nil, aclPipeline)) + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + allowICMPMatch := libovsdbutil.GetACLMatch(pgName, icmpAllowPolicyMatch, aclDir) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, icmpAllowACL), + types.DefaultAllowPriority, allowICMPMatch, nbdb.ACLActionAllow, nil, aclPipeline)) + } + return acls } func (bnc *BaseNetworkController) addPolicyToDefaultPortGroups(np *networkPolicy, aclLogging *libovsdbutil.ACLLoggingLevels) error { @@ -439,17 +448,18 @@ func (bnc *BaseNetworkController) delPolicyFromDefaultPortGroups(np *networkPoli func (bnc *BaseNetworkController) createDefaultDenyPGAndACLs(namespace, policy string, aclLogging *libovsdbutil.ACLLoggingLevels) error { ingressPGIDs := bnc.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress) ingressPGName := libovsdbutil.GetPortGroupName(ingressPGIDs) - ingressDenyACL, ingressAllowACL := bnc.buildDenyACLs(namespace, ingressPGName, aclLogging, libovsdbutil.ACLIngress) + ingressACLs := bnc.buildDenyACLs(namespace, ingressPGName, aclLogging, libovsdbutil.ACLIngress) egressPGIDs := bnc.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress) egressPGName := libovsdbutil.GetPortGroupName(egressPGIDs) - egressDenyACL, egressAllowACL := bnc.buildDenyACLs(namespace, egressPGName, aclLogging, libovsdbutil.ACLEgress) - ops, err := libovsdbops.CreateOrUpdateACLsOps(bnc.nbClient, nil, bnc.GetSamplingConfig(), ingressDenyACL, ingressAllowACL, egressDenyACL, egressAllowACL) + egressACLs := bnc.buildDenyACLs(namespace, egressPGName, aclLogging, libovsdbutil.ACLEgress) + allACLs := append(ingressACLs, egressACLs...) + ops, err := libovsdbops.CreateOrUpdateACLsOps(bnc.nbClient, nil, bnc.GetSamplingConfig(), allACLs...) if err != nil { return err } - ingressPG := libovsdbutil.BuildPortGroup(ingressPGIDs, nil, []*nbdb.ACL{ingressDenyACL, ingressAllowACL}) - egressPG := libovsdbutil.BuildPortGroup(egressPGIDs, nil, []*nbdb.ACL{egressDenyACL, egressAllowACL}) + ingressPG := libovsdbutil.BuildPortGroup(ingressPGIDs, nil, ingressACLs) + egressPG := libovsdbutil.BuildPortGroup(egressPGIDs, nil, egressACLs) ops, err = libovsdbops.CreateOrUpdatePortGroupsOps(bnc.nbClient, ops, ingressPG, egressPG) if err != nil { return err diff --git a/go-controller/pkg/ovn/multipolicy_test.go b/go-controller/pkg/ovn/multipolicy_test.go index 0d6ea4b2d3..5dfc0f59dc 100644 --- a/go-controller/pkg/ovn/multipolicy_test.go +++ b/go-controller/pkg/ovn/multipolicy_test.go @@ -454,91 +454,96 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("correctly creates and deletes network policy and multi network policy with the same policy", func() { - app.Action = func(*cli.Context) error { - var err error - - topology := ovntypes.Layer2Topology - subnets := "10.1.0.0/24" - setUserDefinedNetworkTestData(topology, subnets) - - namespace1 := *newNamespace(namespaceName1) - nPodTest := getTestPod(namespace1.Name, nodeName) - nPodTest.addNetwork(userDefinedNetworkName, nadNamespacedName, "", "", "", "10.1.1.1", "0a:58:0a:01:01:01", "secondary", 1, nil) - networkPolicy := getPortNetworkPolicy(netPolicyName1, namespace1.Name, labelName, labelVal, portNum) - - watchNodes := false - node := *newNode(nodeName, "192.168.126.202/24") + ginkgo.DescribeTable("correctly creates and deletes network policy and multi network policy with the same policy", + func(allowICMPNetworkPolicy bool) { + app.Action = func(*cli.Context) error { + var err error - startOvn(initialDB, watchNodes, []corev1.Node{node}, []corev1.Namespace{namespace1}, nil, nil, - []nettypes.NetworkAttachmentDefinition{*nad}, []testPod{nPodTest}, map[string]string{labelName: labelVal}) + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + topology := ovntypes.Layer2Topology + subnets := "10.1.0.0/24" + setUserDefinedNetworkTestData(topology, subnets) - ginkgo.By("Creating networkPolicy applied to the pod") - _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Create(context.TODO(), networkPolicy, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + namespace1 := *newNamespace(namespaceName1) + nPodTest := getTestPod(namespace1.Name, nodeName) + nPodTest.addNetwork(userDefinedNetworkName, nadNamespacedName, "", "", "", "10.1.1.1", "0a:58:0a:01:01:01", "secondary", 1, nil) + networkPolicy := getPortNetworkPolicy(netPolicyName1, namespace1.Name, labelName, labelVal, portNum) - _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - fakeOvn.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{nPodTest.podIP}) + watchNodes := false + node := *newNode(nodeName, "192.168.126.202/24") - dataParams := newNetpolDataParams(networkPolicy). - withLocalPortUUIDs(nPodTest.portUUID). - withTCPPeerPorts(portNum) - gressPolicyExpectedData1 := getPolicyData(dataParams) - defaultDenyExpectedData1 := getDefaultDenyData(dataParams) - initData := getUpdatedInitialDB([]testPod{nPodTest}) - expectedData1 := append(initData, gressPolicyExpectedData1...) - expectedData1 = append(expectedData1, defaultDenyExpectedData1...) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1...)) + startOvn(initialDB, watchNodes, []corev1.Node{node}, []corev1.Namespace{namespace1}, nil, nil, + []nettypes.NetworkAttachmentDefinition{*nad}, []testPod{nPodTest}, map[string]string{labelName: labelVal}) - ginkgo.By("Creating multi-networkPolicy applied to the pod") - mpolicy := convertNetPolicyToMultiNetPolicy(networkPolicy) - mpolicy.Annotations = map[string]string{PolicyForAnnotation: nadNamespacedName} + ginkgo.By("Creating networkPolicy applied to the pod") + _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Create(context.TODO(), networkPolicy, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Create(context.TODO(), mpolicy, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{nPodTest.podIP}) + + dataParams := newNetpolDataParams(networkPolicy). + withLocalPortUUIDs(nPodTest.portUUID). + withTCPPeerPorts(portNum) + gressPolicyExpectedData1 := getPolicyData(dataParams) + defaultDenyExpectedData1 := getDefaultDenyData(dataParams) + initData := getUpdatedInitialDB([]testPod{nPodTest}) + expectedData1 := append(initData, gressPolicyExpectedData1...) + expectedData1 = append(expectedData1, defaultDenyExpectedData1...) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1...)) + + ginkgo.By("Creating multi-networkPolicy applied to the pod") + mpolicy := convertNetPolicyToMultiNetPolicy(networkPolicy) + mpolicy.Annotations = map[string]string{PolicyForAnnotation: nadNamespacedName} + + _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Create(context.TODO(), mpolicy, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Get(context.TODO(), mpolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Get(context.TODO(), mpolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ocInfo := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] - portInfo := nPodTest.getNetworkPortInfo(userDefinedNetworkName, nadNamespacedName) - gomega.Expect(portInfo).NotTo(gomega.BeNil()) - ocInfo.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{portInfo.podIP}) + ocInfo := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] + portInfo := nPodTest.getNetworkPortInfo(userDefinedNetworkName, nadNamespacedName) + gomega.Expect(portInfo).NotTo(gomega.BeNil()) + ocInfo.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{portInfo.podIP}) + + dataParams2 := newNetpolDataParams(networkPolicy). + withLocalPortUUIDs(portInfo.portUUID). + withTCPPeerPorts(portNum). + withNetInfo(netInfo) + gressPolicyExpectedData2 := getPolicyData(dataParams2) + defaultDenyExpectedData2 := getDefaultDenyData(dataParams2) + expectedData2 := append(expectedData1, gressPolicyExpectedData2...) + expectedData2 = append(expectedData2, defaultDenyExpectedData2...) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData2...)) + + // Delete the multi network policy + ginkgo.By("Deleting the multi network policy") + err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Delete(context.TODO(), mpolicy.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1)) - dataParams2 := newNetpolDataParams(networkPolicy). - withLocalPortUUIDs(portInfo.portUUID). - withTCPPeerPorts(portNum). - withNetInfo(netInfo) - gressPolicyExpectedData2 := getPolicyData(dataParams2) - defaultDenyExpectedData2 := getDefaultDenyData(dataParams2) - expectedData2 := append(expectedData1, gressPolicyExpectedData2...) - expectedData2 = append(expectedData2, defaultDenyExpectedData2...) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData2...)) + ginkgo.By("Deleting the network policy") + err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Delete(context.TODO(), networkPolicy.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Delete the multi network policy - ginkgo.By("Deleting the multi network policy") - err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Delete(context.TODO(), mpolicy.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(initData)) + return nil + } - ginkgo.By("Deleting the network policy") - err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Delete(context.TODO(), networkPolicy.Name, metav1.DeleteOptions{}) + err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(initData)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) ginkgo.DescribeTable("correctly adds and deletes pod IPs from secondary network namespace address set", func(topology string, remote bool) { diff --git a/go-controller/pkg/ovn/policy_stale_test.go b/go-controller/pkg/ovn/policy_stale_test.go index c1bc791f14..91b881833a 100644 --- a/go-controller/pkg/ovn/policy_stale_test.go +++ b/go-controller/pkg/ovn/policy_stale_test.go @@ -3,6 +3,7 @@ package ovn import ( "context" "fmt" + "strings" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" @@ -40,6 +41,9 @@ func getStaleDefaultDenyACL(netpolName, namespace, match string, deny, egress bo name := namespace + "_" + netpolName if !deny { aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, direction, arpAllowACL) + if strings.Contains(match, "icmp") { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, direction, icmpAllowACL) + } priority = types.DefaultAllowPriority action = nbdb.ACLActionAllow name = getStaleARPAllowACLName(namespace) @@ -73,34 +77,44 @@ func getStaleDefaultDenyData(networkPolicy *knet.NetworkPolicy) []libovsdbtest.T egressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLEgress) egressDenyACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName, true, true) - egressAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+arpAllowPolicyMatch, false, true) + egressARPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+arpAllowPolicyMatch, false, true) + + testData := []libovsdbtest.TestData{egressDenyACL, egressARPAllowACL} + egressACLs := []*nbdb.ACL{egressDenyACL, egressARPAllowACL} + + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + egressICMPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+icmpAllowPolicyMatch, false, true) + testData = append(testData, egressICMPAllowACL) + egressACLs = append(egressACLs, egressICMPAllowACL) + } ingressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLIngress) ingressDenyACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName, true, false) - ingressAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+arpAllowPolicyMatch, false, false) + ingressARPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+arpAllowPolicyMatch, false, false) + + ingressACLs := []*nbdb.ACL{ingressDenyACL, ingressARPAllowACL} + testData = append(testData, ingressDenyACL, ingressARPAllowACL) + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + ingressICMPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+icmpAllowPolicyMatch, false, false) + testData = append(testData, ingressICMPAllowACL) + ingressACLs = append(ingressACLs, ingressICMPAllowACL) + } egressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress), nil, - []*nbdb.ACL{egressDenyACL, egressAllowACL}, + egressACLs, ) egressDenyPG.UUID = egressDenyPG.Name + "-UUID" ingressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress), nil, - []*nbdb.ACL{ingressDenyACL, ingressAllowACL}, + ingressACLs, ) ingressDenyPG.UUID = ingressDenyPG.Name + "-UUID" - return []libovsdbtest.TestData{ - egressDenyACL, - egressAllowACL, - ingressDenyACL, - ingressAllowACL, - egressDenyPG, - ingressDenyPG, - } + return append(testData, egressDenyPG, ingressDenyPG) } // getStalePolicyACLs builds stale ACLs for given peers @@ -250,32 +264,37 @@ var _ = ginkgo.Describe("OVN Stale NetworkPolicy Operations", func() { ginkgo.Context("on startup", func() { - ginkgo.It("reconciles an existing networkPolicy updating stale ACLs", func() { - namespace1 := *newNamespace(namespaceName1) - namespace2 := *newNamespace(namespaceName2) - networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, - namespace2.Name, "", true, true) - // start with stale ACLs - gressPolicyInitialData := getStalePolicyData(networkPolicy, []string{namespace2.Name}) - defaultDenyInitialData := getStaleDefaultDenyData(networkPolicy) - initialData := initialDB.NBData - initialData = append(initialData, gressPolicyInitialData...) - initialData = append(initialData, defaultDenyInitialData...) - startOvn(libovsdbtest.TestSetup{NBData: initialData}, []corev1.Namespace{namespace1, namespace2}, - []knet.NetworkPolicy{*networkPolicy}) - - fakeOvn.asf.ExpectEmptyAddressSet(namespaceName1) - fakeOvn.asf.ExpectEmptyAddressSet(namespaceName2) - - _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // make sure stale ACLs were updated - expectedData := getNamespaceWithSinglePolicyExpectedData( - newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), - initialDB.NBData) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData...)) - }) + ginkgo.DescribeTable("reconciles an existing networkPolicy updating stale ACLs", + func(allowICMPNetworkPolicy bool) { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + namespace1 := *newNamespace(namespaceName1) + namespace2 := *newNamespace(namespaceName2) + networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, + namespace2.Name, "", true, true) + // start with stale ACLs + gressPolicyInitialData := getStalePolicyData(networkPolicy, []string{namespace2.Name}) + defaultDenyInitialData := getStaleDefaultDenyData(networkPolicy) + initialData := initialDB.NBData + initialData = append(initialData, gressPolicyInitialData...) + initialData = append(initialData, defaultDenyInitialData...) + startOvn(libovsdbtest.TestSetup{NBData: initialData}, []corev1.Namespace{namespace1, namespace2}, + []knet.NetworkPolicy{*networkPolicy}) + + fakeOvn.asf.ExpectEmptyAddressSet(namespaceName1) + fakeOvn.asf.ExpectEmptyAddressSet(namespaceName2) + + _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // make sure stale ACLs were updated + expectedData := getNamespaceWithSinglePolicyExpectedData( + newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), + initialDB.NBData) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData...)) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) ginkgo.It("reconciles an existing networkPolicy updating stale ACLs with long names", func() { longNamespaceName63 := "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk" // longest allowed namespace name diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index af7923a5cf..1dfe375dc4 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -108,7 +108,7 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * egressDenyACL.UUID = aclIDs.String() + "-UUID" aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLEgress, arpAllowACL) - egressAllowACL := libovsdbops.BuildACL( + egressARPAllowACL := libovsdbops.BuildACL( libovsdbutil.GetACLName(aclIDs), nbdb.ACLDirectionFromLport, types.DefaultAllowPriority, @@ -123,7 +123,36 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * }, types.DefaultACLTier, ) - egressAllowACL.UUID = aclIDs.String() + "-UUID" + egressARPAllowACL.UUID = aclIDs.String() + "-UUID" + + testData := []libovsdbtest.TestData{ + egressDenyACL, + egressARPAllowACL, + } + egressACLs := []*nbdb.ACL{egressDenyACL, egressARPAllowACL} + + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLEgress, icmpAllowACL) + egressICMPAllowACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(aclIDs), + nbdb.ACLDirectionFromLport, + types.DefaultAllowPriority, + "inport == @"+egressPGName+" && "+icmpAllowPolicyMatch, + nbdb.ACLActionAllow, + types.OvnACLLoggingMeter, + "", + false, + aclIDs.GetExternalIDs(), + map[string]string{ + "apply-after-lb": "true", + }, + types.DefaultACLTier, + ) + egressICMPAllowACL.UUID = aclIDs.String() + "-UUID" + testData = append(testData, egressICMPAllowACL) + egressACLs = append(egressACLs, egressICMPAllowACL) + + } ingressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLIngress) aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, defaultDenyACL) @@ -143,7 +172,7 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * ingressDenyACL.UUID = aclIDs.String() + "-UUID" aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, arpAllowACL) - ingressAllowACL := libovsdbops.BuildACL( + ingressARPAllowACL := libovsdbops.BuildACL( libovsdbutil.GetACLName(aclIDs), nbdb.ACLDirectionToLport, types.DefaultAllowPriority, @@ -156,7 +185,31 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * nil, types.DefaultACLTier, ) - ingressAllowACL.UUID = aclIDs.String() + "-UUID" + ingressARPAllowACL.UUID = aclIDs.String() + "-UUID" + + ingressACLs := []*nbdb.ACL{ingressDenyACL, ingressARPAllowACL} + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, icmpAllowACL) + ingressICMPAllowACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(aclIDs), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + "outport == @"+ingressPGName+" && "+icmpAllowPolicyMatch, + nbdb.ACLActionAllow, + types.OvnACLLoggingMeter, + "", + false, + aclIDs.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + ingressICMPAllowACL.UUID = aclIDs.String() + "-UUID" + ingressACLs = append(ingressACLs, ingressICMPAllowACL) + } + + for _, acl := range ingressACLs { + testData = append(testData, acl) + } lsps := []*nbdb.LogicalSwitchPort{} for _, uuid := range params.localPortUUIDs { @@ -167,10 +220,11 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * if policyTypeEgress { egressDenyPorts = lsps } + egressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress), egressDenyPorts, - []*nbdb.ACL{egressDenyACL, egressAllowACL}, + egressACLs, ) egressDenyPG.UUID = egressDenyPG.Name + "-UUID" @@ -181,18 +235,11 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * ingressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress), ingressDenyPorts, - []*nbdb.ACL{ingressDenyACL, ingressAllowACL}, + ingressACLs, ) ingressDenyPG.UUID = ingressDenyPG.Name + "-UUID" - return []libovsdbtest.TestData{ - egressDenyACL, - egressAllowACL, - ingressDenyACL, - ingressAllowACL, - egressDenyPG, - ingressDenyPG, - } + return append(testData, egressDenyPG, ingressDenyPG) } func getDefaultDenyData(params *netpolDataParams) []libovsdbtest.TestData { @@ -797,33 +844,38 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) }) - ginkgo.It("reconciles an existing networkPolicy with empty db", func() { - app.Action = func(*cli.Context) error { - namespace1 := *newNamespace(namespaceName1) - namespace2 := *newNamespace(namespaceName2) - namespace1AddressSetv4, _ := buildNamespaceAddressSets(namespace1.Name, nil) - namespace2AddressSetv4, _ := buildNamespaceAddressSets(namespace2.Name, nil) - // add namespaces to initial Database - initialDB.NBData = append(initialDB.NBData, namespace1AddressSetv4, namespace2AddressSetv4) - - networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, - namespace2.Name, "", true, true) - startOvn(initialDB, []corev1.Namespace{namespace1, namespace2}, []knet.NetworkPolicy{*networkPolicy}, - nil, nil) - - _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.DescribeTable("reconciles an existing networkPolicy with empty db", + func(allowICMPNetworkPolicy bool) { + app.Action = func(*cli.Context) error { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + namespace1 := *newNamespace(namespaceName1) + namespace2 := *newNamespace(namespaceName2) + namespace1AddressSetv4, _ := buildNamespaceAddressSets(namespace1.Name, nil) + namespace2AddressSetv4, _ := buildNamespaceAddressSets(namespace2.Name, nil) + // add namespaces to initial Database + initialDB.NBData = append(initialDB.NBData, namespace1AddressSetv4, namespace2AddressSetv4) + + networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, + namespace2.Name, "", true, true) + startOvn(initialDB, []corev1.Namespace{namespace1, namespace2}, []knet.NetworkPolicy{*networkPolicy}, + nil, nil) + + _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedData := getNamespaceWithSinglePolicyExpectedData( - newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), - initialDB.NBData) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData)) - return nil - } + expectedData := getNamespaceWithSinglePolicyExpectedData( + newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), + initialDB.NBData) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData)) + return nil + } - gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) - }) + gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) ginkgo.It("reconciles an ingress networkPolicy updating an existing ACL", func() { app.Action = func(*cli.Context) error { From 25a4e05621eaa2d77fd9867e54247fa17d6992e7 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Fri, 20 Feb 2026 15:13:48 -0500 Subject: [PATCH 44/59] docs: Add section on how to debug coredumps from non-go binaries Assisted-by: opus (claude-opus-4-5-20251101) Signed-off-by: Ihar Hrachyshka --- docs/developer-guide/debugging.md | 58 +++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/docs/developer-guide/debugging.md b/docs/developer-guide/debugging.md index 5ca9ecd72f..108bac374d 100644 --- a/docs/developer-guide/debugging.md +++ b/docs/developer-guide/debugging.md @@ -72,6 +72,64 @@ Use the [Delve](https://github.com/go-delve/delve) debugger for post-mortem anal (dlv) print # Print variable value ``` +### Debugging C Binaries with GDB (e.g. FRR) + +Some coredumps come from C binaries such as FRR's `bgpd` or `zebra`, not from Go +binaries. These require GDB instead of Delve. + +The key challenge is matching the exact container image that produced the coredump, +since GDB needs the same binary and shared libraries to resolve symbols. + +1. **Identify the image that produced the coredump.** Check the CI job logs for the + `docker run` command that started the crashed process. For example, the external + FRR container may use `quay.io/frrouting/frr:9.1.0` (deployed via + `contrib/kind-common.sh`). + +2. **Run the same image with the coredumps mounted:** + + ```bash + docker run --platform linux/amd64 -it \ + -v /path/to/coredumps:/coredumps \ + quay.io/frrouting/frr:9.1.0 sh + ``` + + Using `--platform linux/amd64` is important if the coredump was generated on + x86_64 and you are on a different architecture (e.g. Apple Silicon). + +3. **Install GDB and debug symbols inside the container:** + + ```bash + apk add gdb frr-dbg musl-dbg + ``` + + The exact package names depend on the base distro. Alpine uses `-dbg` suffix. + +4. **Run GDB:** + + ```bash + gdb /usr/lib/frr/bgpd /coredumps/core.38907.bgpd.ovn-control-plane.11 + ``` + +5. **Explore the crash:** + + ``` + (gdb) bt # Show backtrace + (gdb) thread apply all bt # Backtraces for all threads + (gdb) frame # Select stack frame + (gdb) info locals # Show local variables + (gdb) info args # Show function arguments + (gdb) print *some_ptr # Dereference and print a pointer + (gdb) info sharedlibrary # Check if all shared libraries are resolved + ``` + +6. **Troubleshooting missing symbols.** If the backtrace shows `??` for most frames: + - Run `info sharedlibrary` in GDB. Lines marked `(*)` are missing debug info. + - Verify you are using the exact same image tag that produced the coredump. + Floating tags (like `latest` or even `9.1.0`) may have been rebuilt with updated + packages. If the shared library versions don't match (GDB will print warnings + about missing `.so` files), you need the exact image digest from CI. + - Install additional `-dbg` packages for libraries that appear in the backtrace. + ### Local Development To enable coredump collection in a local KIND cluster: From bcb7ec917c74fc97f2476eaea9b8c3189bc77c42 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Tue, 27 Jan 2026 16:23:36 -0800 Subject: [PATCH 45/59] Allow emitting metrics on a single endpoint When --metrics-bind-address and --ovn-metrics-bind-address are same, emit both ovnkube and OVN/OVS metrics from a single endpoint. Signed-off-by: Lei Huang --- go-controller/cmd/ovnkube/ovnkube.go | 22 +++++++++++++++++++--- go-controller/pkg/metrics/metrics.go | 3 +-- go-controller/pkg/metrics/server.go | 16 ++-------------- go-controller/pkg/metrics/server_test.go | 6 ++---- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index e2b92b454a..4d7c008152 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -14,6 +14,7 @@ import ( "text/template" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/urfave/cli/v2" "k8s.io/apimachinery/pkg/util/sets" @@ -261,6 +262,15 @@ func determineOvnkubeRunMode(ctx *cli.Context) (*ovnkubeRunMode, error) { return mode, nil } +// Determine if we should serve both ovnkube-node and OVN/OVS metrics on a single endpoint. +func combineMetricsEndpoints(runMode *ovnkubeRunMode) bool { + return runMode != nil && + runMode.node && + config.Metrics.BindAddress != "" && + config.Metrics.BindAddress == config.Metrics.OVNMetricsBindAddress && + config.OvnKubeNode.Mode != types.NodeModeDPUHost +} + func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { pidfile := ctx.String("pidfile") if pidfile != "" { @@ -311,9 +321,9 @@ func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { eventRecorder := util.EventRecorder(ovnClientset.KubeClient) - // Start metric server for master and node. Expose the metrics HTTP endpoint if configured. + // Start the general metrics server only when not combined. // Non LE master instances also are required to expose the metrics server. - if config.Metrics.BindAddress != "" { + if config.Metrics.BindAddress != "" && !combineMetricsEndpoints(runMode) { metrics.StartMetricsServer(config.Metrics.BindAddress, config.Metrics.EnablePprof, config.Metrics.NodeServerCert, config.Metrics.NodeServerPrivKey, ctx.Done(), ovnKubeStartWg) } @@ -603,7 +613,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util // start the prometheus server to serve OVS and OVN Metrics (default port: 9476) // Note: for ovnkube node mode dpu-host no metrics is required as ovs/ovn is not running on the node. - if config.OvnKubeNode.Mode != types.NodeModeDPUHost && config.Metrics.OVNMetricsBindAddress != "" { + if runMode.node && config.OvnKubeNode.Mode != types.NodeModeDPUHost && config.Metrics.OVNMetricsBindAddress != "" { if ovsClient == nil { ovsClient, err = libovsdb.NewOVSClient(ctx.Done()) @@ -623,6 +633,12 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util EnableOVNDBMetrics: true, } + if combineMetricsEndpoints(runMode) { + // Reuse the default registry (and its gatherer) so ovnkube-node metrics and OVN metrics share one endpoint. + opts.Registerer = prometheus.DefaultRegisterer + opts.EnablePprof = config.Metrics.EnablePprof + } + if !config.OVNKubernetesFeature.EnableInterconnect { // In Central mode, OVNKube Node doesn't need to register OVN Northd and DB metrics unless // OVNKube Master Pod is running on this node. diff --git a/go-controller/pkg/metrics/metrics.go b/go-controller/pkg/metrics/metrics.go index 704ae11955..e574a9c468 100644 --- a/go-controller/pkg/metrics/metrics.go +++ b/go-controller/pkg/metrics/metrics.go @@ -467,9 +467,8 @@ func StartMetricsServer(bindAddress string, enablePprof bool, certFile string, k CertFile: certFile, KeyFile: keyFile, EnablePprof: enablePprof, - // Use default registry/gatherer so existing metric registrations keep working. + // Use default registry so existing metric registrations keep working. Registerer: prometheus.DefaultRegisterer, - Gatherer: prometheus.DefaultGatherer, } server := NewMetricServer(opts, nil, nil) diff --git a/go-controller/pkg/metrics/server.go b/go-controller/pkg/metrics/server.go index 2641cb4ec7..ca5a23680a 100644 --- a/go-controller/pkg/metrics/server.go +++ b/go-controller/pkg/metrics/server.go @@ -44,7 +44,6 @@ type MetricServerOptions struct { // Prometheus plumbing Registerer prometheus.Registerer - Gatherer prometheus.Gatherer // Kubernetes integration K8sClient kubernetes.Interface @@ -69,9 +68,8 @@ type MetricServer struct { server *http.Server mux *http.ServeMux - // Prometheus registry / gatherer + // Prometheus registry registerer prometheus.Registerer - gatherer prometheus.Gatherer } // NewMetricServer creates a new MetricServer instance @@ -80,26 +78,16 @@ func NewMetricServer(opts MetricServerOptions, ovsDBClient libovsdbclient.Client if registerer == nil { registerer = prometheus.NewRegistry() } - gatherer := opts.Gatherer - if gatherer == nil { - if reg, ok := registerer.(prometheus.Gatherer); ok { - gatherer = reg - } else { - gatherer = prometheus.DefaultGatherer - } - } - // Create server instance server := &MetricServer{ opts: opts, ovsDBClient: ovsDBClient, registerer: registerer, - gatherer: gatherer, kubeClient: kubeClient, } server.mux = http.NewServeMux() - tg := prometheus.ToTransactionalGatherer(server.gatherer) + tg := prometheus.ToTransactionalGatherer(server.registerer.(prometheus.Gatherer)) metricsHandler := promhttp.HandlerForTransactional(tg, promhttp.HandlerOpts{}) server.mux.Handle("/metrics", promhttp.InstrumentMetricHandler( diff --git a/go-controller/pkg/metrics/server_test.go b/go-controller/pkg/metrics/server_test.go index 98e568b80f..c02bfd1188 100644 --- a/go-controller/pkg/metrics/server_test.go +++ b/go-controller/pkg/metrics/server_test.go @@ -50,7 +50,6 @@ func TestNewMetricServerRunAndShutdown(t *testing.T) { require.NotNil(t, server, "Server should not be nil") require.NotNil(t, server.mux, "Server mux should not be nil") require.NotNil(t, server.registerer, "Server registerer should not be nil") - require.NotNil(t, server.gatherer, "Server gatherer should not be nil") // Start server in background serverDone := make(chan struct{}) @@ -112,7 +111,6 @@ func TestNewMetricServerRunAndFailOnFatalError(t *testing.T) { require.NotNil(t, server, "Server should not be nil") require.NotNil(t, server.mux, "Server mux should not be nil") require.NotNil(t, server.registerer, "Server registerer should not be nil") - require.NotNil(t, server.gatherer, "Server gatherer should not be nil") // Start server in background serverDone := make(chan struct{}) @@ -873,8 +871,8 @@ func TestHandleMetrics(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) server.registerMetrics() - // iterate s.ovnRegistry to list all registered metrics' names - regMetrics, err := server.gatherer.Gather() + // Iterate server registry to list all registered metric names. + regMetrics, err := server.registerer.(prometheus.Gatherer).Gather() if err != nil { t.Fatalf("Failed to gather metrics: %v", err) } From 63468cc87c48703ea75b67b87a054197f03b30dd Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Tue, 13 May 2025 22:03:00 -0400 Subject: [PATCH 46/59] Adds kind.sh and helm support for --allow-icmp-network-policy Signed-off-by: Tim Rozet --- contrib/kind-common.sh | 1 + contrib/kind-helm.sh | 5 +++ contrib/kind.sh | 7 ++++- dist/images/daemonset.sh | 15 +++++++++ dist/images/ovnkube.sh | 31 +++++++++++++++++++ dist/templates/ovnkube-control-plane.yaml.j2 | 2 ++ dist/templates/ovnkube-master.yaml.j2 | 2 ++ .../ovnkube-single-node-zone.yaml.j2 | 2 ++ .../templates/ovnkube-zone-controller.yaml.j2 | 2 ++ .../templates/ovnkube-control-plane.yaml | 2 ++ .../templates/deployment-ovnkube-master.yaml | 2 ++ .../templates/ovnkube-single-node-zone.yaml | 2 ++ .../templates/ovnkube-zone-controller.yaml | 2 ++ .../values-multi-node-zone.yaml | 2 ++ helm/ovn-kubernetes/values-no-ic.yaml | 2 ++ .../values-single-node-zone.yaml | 2 ++ 16 files changed, 80 insertions(+), 1 deletion(-) diff --git a/contrib/kind-common.sh b/contrib/kind-common.sh index 12ed53d7c2..763c63e3f0 100644 --- a/contrib/kind-common.sh +++ b/contrib/kind-common.sh @@ -103,6 +103,7 @@ set_common_default_params() { OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} METRICS_IP=${METRICS_IP:-""} + OVN_ALLOW_ICMP_NETPOL=${OVN_ALLOW_ICMP_NETPOL:-false} OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} if [ "$OVN_COMPACT_MODE" == true ]; then KIND_NUM_WORKER=0 diff --git a/contrib/kind-helm.sh b/contrib/kind-helm.sh index 85764d6d91..285f8e6c0c 100755 --- a/contrib/kind-helm.sh +++ b/contrib/kind-helm.sh @@ -96,6 +96,7 @@ usage() { echo "-ce | --enable-central [DEPRECATED] Deploy with OVN Central (Legacy Architecture)" echo "-npz | --nodes-per-zone Specify number of nodes per zone (Default 0, which means global zone; >0 means interconnect zone, where 1 for single-node zone, >1 for multi-node zone). If this value > 1, then (total k8s nodes (workers + 1) / num of nodes per zone) should be zero." echo "-mps | --multi-pod-subnet Use multiple subnets for the default cluster network" + echo "--allow-icmp-netpol Allows ICMP and ICMPv6 traffic globally, regardless of network policy rules" echo "" } @@ -196,6 +197,8 @@ parse_args() { OVN_ENABLE_INTERCONNECT=false CENTRAL_ARG_PROVIDED=true ;; + --allow-icmp-netpol ) OVN_ALLOW_ICMP_NETPOL=true + ;; -ic | --enable-interconnect ) OVN_ENABLE_INTERCONNECT=true IC_ARG_PROVIDED=true ;; @@ -264,6 +267,7 @@ print_params() { echo "KIND_NUM_WORKER = $KIND_NUM_WORKER" echo "OVN_ENABLE_DNSNAMERESOLVER= $OVN_ENABLE_DNSNAMERESOLVER" echo "MULTI_POD_SUBNET= $MULTI_POD_SUBNET" + echo "OVN_ALLOW_ICMP_NETPOL= $OVN_ALLOW_ICMP_NETPOL" echo "OVN_ENABLE_INTERCONNECT = $OVN_ENABLE_INTERCONNECT" echo "DYNAMIC_UDN_ALLOCATION = $DYNAMIC_UDN_ALLOCATION" echo "DYNAMIC_UDN_GRACE_PERIOD = $DYNAMIC_UDN_GRACE_PERIOD" @@ -371,6 +375,7 @@ helm install ovn-kubernetes . -f "${value_file}" \ --set global.enableNetworkQos=$(if [ "${OVN_NETWORK_QOS_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableNoOverlay=$(if [ "${ENABLE_NO_OVERLAY}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableCoredumps=$(if [ "${ENABLE_COREDUMPS}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.allowICMPNetworkPolicy=$(if [ "${OVN_ALLOW_ICMP_NETPOL}" == "true" ]; then echo "true"; else echo "false"; fi) \ ${ovnkube_db_options} EOF ) diff --git a/contrib/kind.sh b/contrib/kind.sh index ea8ce6fc7c..a68b922d95 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -126,6 +126,7 @@ echo "-adv | --advertise-default-network Applies a RouteAdvertisement echo "-rud | --routed-udn-isolation-disable Disable isolation across BGP-advertised UDNs (sets advertised-udn-isolation-mode=loose). DEFAULT: strict." echo "-mps | --multi-pod-subnet Use multiple subnets for the default cluster network" echo "-noe | --no-overlay-enable Enable no overlay" +echo "--allow-icmp-netpol Allows ICMP and ICMPv6 traffic globally, regardless of network policy rules" echo "" } @@ -377,6 +378,8 @@ parse_args() { ;; -mps| --multi-pod-subnet ) MULTI_POD_SUBNET=true ;; + --allow-icmp-netpol ) OVN_ALLOW_ICMP_NETPOL=true + ;; -h | --help ) usage exit ;; @@ -481,6 +484,7 @@ print_params() { echo "OVN_MTU= $OVN_MTU" echo "OVN_ENABLE_DNSNAMERESOLVER= $OVN_ENABLE_DNSNAMERESOLVER" echo "MULTI_POD_SUBNET= $MULTI_POD_SUBNET" + echo "OVN_ALLOW_ICMP_NETPOL= $OVN_ALLOW_ICMP_NETPOL" echo "" } @@ -761,7 +765,8 @@ create_ovn_kube_manifests() { --network-qos-enable="${OVN_NETWORK_QOS_ENABLE}" \ --mtu="${OVN_MTU}" \ --enable-dnsnameresolver="${OVN_ENABLE_DNSNAMERESOLVER}" \ - --enable-observ="${OVN_OBSERV_ENABLE}" + --enable-observ="${OVN_OBSERV_ENABLE}" \ + --allow-icmp-netpol="${OVN_ALLOW_ICMP_NETPOL}" popd } diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index 4430d29143..15ad3b1e25 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -106,6 +106,8 @@ OVN_NETWORK_QOS_ENABLE= OVN_ENABLE_DNSNAMERESOLVER="false" OVN_NOHOSTSUBNET_LABEL="" OVN_DISABLE_REQUESTEDCHASSIS="false" +OVN_ALLOW_ICMP_NETPOL="false" + # IN_UPGRADE is true only if called by upgrade-ovn.sh during the upgrade test, # it will render only the parts in ovn-setup.yaml related to RBAC permissions. IN_UPGRADE= @@ -402,6 +404,9 @@ while [ "$1" != "" ]; do --enable-dnsnameresolver) OVN_ENABLE_DNSNAMERESOLVER=$VALUE ;; + --allow-icmp-netpol) + OVN_ALLOW_ICMP_NETPOL=$VALUE + ;; --enable-observ) OVN_OBSERV_ENABLE=$VALUE ;; @@ -653,6 +658,9 @@ echo "ovn_network_qos_enable: ${ovn_network_qos_enable}" ovn_enable_dnsnameresolver=${OVN_ENABLE_DNSNAMERESOLVER} echo "ovn_enable_dnsnameresolver: ${ovn_enable_dnsnameresolver}" +ovn_allow_icmp_netpol=${OVN_ALLOW_ICMP_NETPOL} +echo "ovn_allow_icmp_netpol: ${ovn_allow_icmp_netpol}" + ovn_observ_enable=${OVN_OBSERV_ENABLE} echo "ovn_observ_enable: ${ovn_observ_enable}" @@ -892,6 +900,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ ovn_nohostsubnet_label=${ovn_nohostsubnet_label} \ ovn_disable_requestedchassis=${ovn_disable_requestedchassis} \ @@ -948,6 +957,7 @@ ovn_image=${ovnkube_image} \ ovn_v6_transit_subnet=${ovn_v6_transit_subnet} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ metrics_ip=${metrics_ip} \ @@ -1056,6 +1066,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-single-node-zone.yaml.j2 -o ${output_dir}/ovnkube-single-node-zone.yaml @@ -1226,6 +1237,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ metrics_ip=${metrics_ip} \ @@ -1292,11 +1304,13 @@ net_cidr=${net_cidr} svc_cidr=${svc_cidr} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ jinjanate ../templates/rbac-ovnkube-node.yaml.j2 -o ${output_dir}/rbac-ovnkube-node.yaml ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ @@ -1304,6 +1318,7 @@ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 8f01b2f458..50751aa3da 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -98,6 +98,7 @@ fi # OVN_NORTHD_BACKOFF_INTERVAL - ovn northd backoff interval in ms (default 300) # OVN_ENABLE_SVC_TEMPLATE_SUPPORT - enable svc template support # OVN_ENABLE_DNSNAMERESOLVER - enable dns name resolver support +# OVN_ALLOW_ICMP_NETPOL - allow ICMP and ICMPv6 regardless of network policy # OVN_OBSERV_ENABLE - enable observability for ovnkube # The argument to the command is the operation to be performed @@ -328,6 +329,8 @@ ovn_enable_svc_template_support=${OVN_ENABLE_SVC_TEMPLATE_SUPPORT:-true} ovn_network_qos_enable=${OVN_NETWORK_QOS_ENABLE:-false} # OVN_ENABLE_DNSNAMERESOLVER - enable dns name resolver support ovn_enable_dnsnameresolver=${OVN_ENABLE_DNSNAMERESOLVER:-false} +# OVN_ALLOW_ICMP_NETPOL - allow ICMP/ICMPv6 with network policy +ovn_allow_icmp_netpol=${OVN_ALLOW_ICMP_NETPOL:-false} # OVN_OBSERV_ENABLE - enable observability for ovnkube ovn_observ_enable=${OVN_OBSERV_ENABLE:-false} # OVN_NOHOSTSUBNET_LABEL - node label indicating nodes managing their own network @@ -1501,6 +1504,12 @@ ovn-master() { fi echo "ovn_enable_dnsnameresolver_flag=${ovn_enable_dnsnameresolver_flag}" + ovn_allow_icmp_netpol_flag= + if [[ ${ovn_allow_icmp_netpol} == "true" ]]; then + ovn_allow_icmp_netpol_flag="--allow-icmp-network-policy" + fi + echo "ovn_allow_icmp_netpol_flag=${ovn_allow_icmp_netpol_flag}" + /usr/bin/ovnkube --init-master ${K8S_NODE} \ ${anp_enabled_flag} \ ${disable_forwarding_flag} \ @@ -1537,6 +1546,7 @@ ovn-master() { ${persistent_ips_enabled_flag} \ ${network_qos_enabled_flag} \ ${ovn_enable_dnsnameresolver_flag} \ + ${ovn_allow_icmp_netpol_flag} \ ${nohostsubnet_label_option} \ ${ovn_stateless_netpol_enable_flag} \ ${ovn_disable_requestedchassis_flag} \ @@ -1844,6 +1854,12 @@ ovnkube-controller() { fi echo "ovn_enable_dnsnameresolver_flag=${ovn_enable_dnsnameresolver_flag}" + ovn_allow_icmp_netpol_flag= + if [[ ${ovn_allow_icmp_netpol} == "true" ]]; then + ovn_allow_icmp_netpol_flag="--allow-icmp-network-policy" + fi + echo "ovn_allow_icmp_netpol_flag=${ovn_allow_icmp_netpol_flag}" + ovn_observ_enable_flag= if [[ ${ovn_observ_enable} == "true" ]]; then ovn_observ_enable_flag="--enable-observability" @@ -1898,6 +1914,7 @@ ovnkube-controller() { ${ovn_enable_dnsnameresolver_flag} \ ${dynamic_udn_allocation_flag} \ ${dynamic_udn_grace_period} \ + ${ovn_allow_icmp_netpol_flag} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --gateway-mode=${ovn_gateway_mode} \ --host-network-namespace ${ovn_host_network_namespace} \ @@ -2334,6 +2351,12 @@ ovnkube-controller-with-node() { fi echo "ovn_enable_dnsnameresolver_flag=${ovn_enable_dnsnameresolver_flag}" + ovn_allow_icmp_netpol_flag= + if [[ ${ovn_allow_icmp_netpol} == "true" ]]; then + ovn_allow_icmp_netpol_flag="--allow-icmp-network-policy" + fi + echo "ovn_allow_icmp_netpol_flag=${ovn_allow_icmp_netpol_flag}" + ovn_observ_enable_flag= if [[ ${ovn_observ_enable} == "true" ]]; then ovn_observ_enable_flag="--enable-observability" @@ -2433,6 +2456,7 @@ ovnkube-controller-with-node() { ${ovn_enable_dnsnameresolver_flag} \ ${ovn_disable_requestedchassis_flag} \ ${cluster_access_opts} \ + ${ovn_allow_icmp_netpol_flag} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --export-ovs-metrics \ --gateway-mode=${ovn_gateway_mode} ${ovn_gateway_opts} \ @@ -2664,6 +2688,12 @@ ovn-cluster-manager() { fi echo "dynamic_udn_grace_period=${dynamic_udn_grace_period}" + ovn_allow_icmp_netpol_flag= + if [[ ${ovn_allow_icmp_netpol} == "true" ]]; then + ovn_allow_icmp_netpol_flag="--allow-icmp-network-policy" + fi + echo "ovn_allow_icmp_netpol_flag=${ovn_allow_icmp_netpol_flag}" + echo "=============== ovn-cluster-manager ========== MASTER ONLY" /usr/bin/ovnkube --init-cluster-manager ${K8S_NODE} \ ${anp_enabled_flag} \ @@ -2698,6 +2728,7 @@ ovn-cluster-manager() { ${dynamic_udn_allocation_flag} \ ${dynamic_udn_grace_period} \ ${ovn_enable_dnsnameresolver_flag} \ + ${ovn_allow_icmp_netpol_flag} \ --gateway-mode=${ovn_gateway_mode} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --host-network-namespace ${ovn_host_network_namespace} \ diff --git a/dist/templates/ovnkube-control-plane.yaml.j2 b/dist/templates/ovnkube-control-plane.yaml.j2 index ed690d58d1..c40897392f 100644 --- a/dist/templates/ovnkube-control-plane.yaml.j2 +++ b/dist/templates/ovnkube-control-plane.yaml.j2 @@ -201,6 +201,8 @@ spec: value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" + - name: OVN_ALLOW_ICMP_NETPOL + value: "{{ ovn_allow_icmp_netpol }}" # end of container volumes: diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index b4fee83afa..84cd608239 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -331,6 +331,8 @@ spec: value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" + - name: OVN_ALLOW_ICMP_NETPOL + value: "{{ ovn_allow_icmp_netpol }}" # end of container volumes: diff --git a/dist/templates/ovnkube-single-node-zone.yaml.j2 b/dist/templates/ovnkube-single-node-zone.yaml.j2 index 258b448e16..3936d90e3a 100644 --- a/dist/templates/ovnkube-single-node-zone.yaml.j2 +++ b/dist/templates/ovnkube-single-node-zone.yaml.j2 @@ -498,6 +498,8 @@ spec: value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" + - name: OVN_ALLOW_ICMP_NETPOL + value: "{{ ovn_allow_icmp_netpol }}" readinessProbe: exec: diff --git a/dist/templates/ovnkube-zone-controller.yaml.j2 b/dist/templates/ovnkube-zone-controller.yaml.j2 index c984eab781..2ab0293729 100644 --- a/dist/templates/ovnkube-zone-controller.yaml.j2 +++ b/dist/templates/ovnkube-zone-controller.yaml.j2 @@ -419,6 +419,8 @@ spec: value: "local" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" + - name: OVN_ALLOW_ICMP_NETPOL + value: "{{ ovn_allow_icmp_netpol }}" - name: OVN_OBSERV_ENABLE value: "{{ ovn_observ_enable }}" # end of container diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml index 5698797434..90efbecad1 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml @@ -186,6 +186,8 @@ spec: value: {{ hasKey .Values.global "enablePersistentIPs" | ternary .Values.global.enablePersistentIPs false | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} # end of container volumes: # TODO: Need to check why we need this? diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml index df2a7a1d0f..d87d1878fd 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml @@ -307,6 +307,8 @@ spec: value: {{ hasKey .Values.global "enablePersistentIPs" | ternary .Values.global.enablePersistentIPs false | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_DISABLE_REQUESTEDCHASSIS value: {{ default "false" .Values.global.disableRequestedchassis | quote }} # end of container diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml index c2503c0d1d..19ffdf112d 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml @@ -472,6 +472,8 @@ spec: value: {{ hasKey .Values.global "enableSvcTemplate" | ternary .Values.global.enableSvcTemplate true | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_OBSERV_ENABLE value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} - name: OVN_NETWORK_QOS_ENABLE diff --git a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml index bd03a2518c..e26b24a64b 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml @@ -388,6 +388,8 @@ spec: value: "local" - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_OBSERV_ENABLE value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} # end of container diff --git a/helm/ovn-kubernetes/values-multi-node-zone.yaml b/helm/ovn-kubernetes/values-multi-node-zone.yaml index d3b1c16755..ae73d2827a 100644 --- a/helm/ovn-kubernetes/values-multi-node-zone.yaml +++ b/helm/ovn-kubernetes/values-multi-node-zone.yaml @@ -116,6 +116,8 @@ global: lFlowCacheLimitKb: "" # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws disableSnatMultipleGws: "" # -- Controls if forwarding is allowed on OVNK controlled interfaces diff --git a/helm/ovn-kubernetes/values-no-ic.yaml b/helm/ovn-kubernetes/values-no-ic.yaml index 1ed4e30a2d..f366632023 100644 --- a/helm/ovn-kubernetes/values-no-ic.yaml +++ b/helm/ovn-kubernetes/values-no-ic.yaml @@ -104,6 +104,8 @@ global: enableLFlowCache: true # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Maximum number of logical flow cache entries ovn-controller may create when the logical flow cache is enabled # @default -- unlimited lFlowCacheLimit: "" diff --git a/helm/ovn-kubernetes/values-single-node-zone.yaml b/helm/ovn-kubernetes/values-single-node-zone.yaml index b9f4f2caf2..5b2dcf6976 100644 --- a/helm/ovn-kubernetes/values-single-node-zone.yaml +++ b/helm/ovn-kubernetes/values-single-node-zone.yaml @@ -118,6 +118,8 @@ global: enablePersistentIPs: true # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws disableSnatMultipleGws: "" # -- Controls if forwarding is allowed on OVNK controlled interfaces From 69afd47b66cdd972f15cfbf8835f891b723a13a1 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 30 Jan 2026 16:42:56 -0500 Subject: [PATCH 47/59] Adds E2E testing for ICMP NP bypass Signed-off-by: Tim Rozet --- test/e2e/network_policy_icmp.go | 71 ++++++++++++++ test/e2e/network_segmentation_policy.go | 117 ++++++++++++++++++++++++ test/e2e/util.go | 6 ++ 3 files changed, 194 insertions(+) create mode 100644 test/e2e/network_policy_icmp.go diff --git a/test/e2e/network_policy_icmp.go b/test/e2e/network_policy_icmp.go new file mode 100644 index 0000000000..510394a2fe --- /dev/null +++ b/test/e2e/network_policy_icmp.go @@ -0,0 +1,71 @@ +package e2e + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/feature" + + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" +) + +var _ = ginkgo.Describe("Network Policy: ICMP bypass", feature.NetworkPolicy, func() { + f := wrappedTestFramework("network-policy-icmp") + + ginkgo.BeforeEach(func() { + if !isICMPNetworkPolicyBypassEnabled() { + ginkgo.Skip("Allow ICMP bypass with NetworkPolicy is not enabled, skipping ICMP bypass network policy tests") + } + }) + + ginkgo.It("allows ICMP between pods with default deny policy on the default network", func() { + namespace := f.Namespace.Name + + ginkgo.By("creating a \"default deny\" network policy") + _, err := makeDenyAllPolicy(f, namespace, "deny-all") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("creating server and client pods") + serverPodName := "icmp-server" + clientPodName := "icmp-client" + serverCmd := []string{"/bin/bash", "-c", "/agnhost netexec --http-port 8000"} + clientCmd := []string{"/agnhost", "pause"} + + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + framework.ExpectNoError(err, "") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + serverNode := nodes.Items[0].Name + clientNode := nodes.Items[1].Name + + serverPod, err := createGenericPod(f, serverPodName, serverNode, namespace, serverCmd) + framework.ExpectNoError(err, "failed to create server pod") + _, err = createGenericPod(f, clientPodName, clientNode, namespace, clientCmd) + framework.ExpectNoError(err, "failed to create client pod") + + clientConfig := podConfiguration{name: clientPodName, namespace: namespace} + serverConfig := podConfiguration{name: serverPodName, namespace: namespace} + + ginkgo.By("verifying TCP is denied by the default deny policy") + gomega.Eventually(func() error { + return pokePod(f, clientPodName, serverPod.Status.PodIP) + }, 1*time.Minute, 6*time.Second).ShouldNot(gomega.Succeed()) + gomega.Consistently(func() error { + return pokePod(f, clientPodName, serverPod.Status.PodIP) + }, 15*time.Second, 5*time.Second).ShouldNot(gomega.Succeed()) + + ginkgo.By("verifying ICMP is allowed between pods") + serverIPs, err := podIPsFromStatus(f.ClientSet, namespace, serverPodName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return pingServerPodFromClient(f.ClientSet, serverConfig, clientConfig, serverIP) + }, 1*time.Minute, 6*time.Second).Should(gomega.Succeed()) + } + }) +}) diff --git a/test/e2e/network_segmentation_policy.go b/test/e2e/network_segmentation_policy.go index 44f47598b9..0fc8216c59 100644 --- a/test/e2e/network_segmentation_policy.go +++ b/test/e2e/network_segmentation_policy.go @@ -207,6 +207,123 @@ var _ = ginkgo.Describe("Network Segmentation: Network Policies", feature.Networ ), ) + ginkgo.DescribeTable( + "ICMP should bypass default deny policy for UDNs when enabled", + func( + netConfigParams networkAttachmentConfigParams, + clientPodConfig podConfiguration, + serverPodConfig podConfiguration, + ) { + if !isICMPNetworkPolicyBypassEnabled() { + ginkgo.Skip("ICMP Network Policy bypass is not enabled, skipping ICMP bypass network policy tests") + } + + ginkgo.By("Creating the attachment configuration") + netConfig := newNetworkAttachmentConfig(netConfigParams) + netConfig.namespace = f.Namespace.Name + netConfig.cidr = filterCIDRsAndJoin(cs, netConfig.cidr) + _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create( + context.Background(), + generateNAD(netConfig, f.ClientSet), + metav1.CreateOptions{}, + ) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("creating client/server pods") + serverPodConfig.namespace = f.Namespace.Name + clientPodConfig.namespace = f.Namespace.Name + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 2) + framework.ExpectNoError(err, "") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[0].GetName()} + clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[1].GetName()} + runUDNPod(cs, f.Namespace.Name, serverPodConfig, nil) + runUDNPod(cs, f.Namespace.Name, clientPodConfig, nil) + + ginkgo.By("creating a \"default deny\" network policy") + _, err = makeDenyAllPolicy(f, f.Namespace.Name, "deny-all") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + var serverIPs []string + for i, cidr := range strings.Split(netConfig.cidr, ",") { + if cidr == "" { + continue + } + serverIP, err := getPodAnnotationIPsForAttachmentByIndex( + cs, + f.Namespace.Name, + serverPodConfig.name, + namespacedName(f.Namespace.Name, netConfig.name), + i, + ) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + serverIPs = append(serverIPs, serverIP) + } + gomega.Expect(serverIPs).NotTo(gomega.BeEmpty()) + + ginkgo.By("asserting the *client* pod can ping the server pod despite the default deny policy") + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return pingServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP) + }, 1*time.Minute, 6*time.Second).Should(gomega.Succeed()) + } + + ginkgo.By("asserting the *client* pod can not reach the server pod HTTP endpoint due to default deny policy") + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + }, 1*time.Minute, 6*time.Second).ShouldNot(gomega.Succeed()) + gomega.Consistently(func() error { + return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + }, 15*time.Second, 5*time.Second).ShouldNot(gomega.Succeed()) + } + }, + ginkgo.Entry( + "in L2 dualstack primary UDN", + networkAttachmentConfigParams{ + name: nadName, + topology: "layer2", + cidr: joinStrings(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + role: "primary", + }, + *podConfig( + "client-pod", + withCommand(func() []string { + return []string{"/agnhost", "pause"} + }), + ), + *podConfig( + "server-pod", + withCommand(func() []string { + return httpServerContainerCmd(port) + }), + ), + ), + ginkgo.Entry( + "in L3 dualstack primary UDN", + networkAttachmentConfigParams{ + name: nadName, + topology: "layer3", + cidr: joinStrings(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + role: "primary", + }, + *podConfig( + "client-pod", + withCommand(func() []string { + return []string{"/agnhost", "pause"} + }), + ), + *podConfig( + "server-pod", + withCommand(func() []string { + return httpServerContainerCmd(port) + }), + ), + ), + ) + ginkgo.DescribeTable( "allow ingress traffic to one pod from a particular namespace", func( diff --git a/test/e2e/util.go b/test/e2e/util.go index 5a9715d4a8..df01c07b24 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1440,6 +1440,12 @@ func isNetworkSegmentationEnabled() bool { return present && val == "true" } +func isICMPNetworkPolicyBypassEnabled() bool { + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + val := getTemplateContainerEnv(ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), "OVN_ALLOW_ICMP_NETPOL") + return val == "true" +} + func isLocalGWModeEnabled() bool { val, present := os.LookupEnv("OVN_GATEWAY_MODE") return present && val == "local" From ca967b82c4f1628d012c287337a7148af17a9f6b Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Mon, 23 Feb 2026 15:27:32 -0500 Subject: [PATCH 48/59] Adds unit test for stale ICMP network policy ACL Configures stale ICMP allow ACLs, then starts up and verifies with the config knob off, that the ACLs are removed. Signed-off-by: Tim Rozet --- go-controller/pkg/ovn/policy_stale_test.go | 36 ++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/ovn/policy_stale_test.go b/go-controller/pkg/ovn/policy_stale_test.go index 91b881833a..5bb5ac44ca 100644 --- a/go-controller/pkg/ovn/policy_stale_test.go +++ b/go-controller/pkg/ovn/policy_stale_test.go @@ -71,6 +71,10 @@ func getStaleARPAllowACLName(ns string) string { // getStaleDefaultDenyData builds stale ACLs and port groups for given netpol func getStaleDefaultDenyData(networkPolicy *knet.NetworkPolicy) []libovsdbtest.TestData { + return getStaleDefaultDenyDataWithICMP(networkPolicy, config.OVNKubernetesFeature.AllowICMPNetworkPolicy) +} + +func getStaleDefaultDenyDataWithICMP(networkPolicy *knet.NetworkPolicy, includeICMP bool) []libovsdbtest.TestData { namespace := networkPolicy.Namespace netpolName := networkPolicy.Name fakeController := getFakeBaseController(&util.DefaultNetInfo{}) @@ -82,7 +86,7 @@ func getStaleDefaultDenyData(networkPolicy *knet.NetworkPolicy) []libovsdbtest.T testData := []libovsdbtest.TestData{egressDenyACL, egressARPAllowACL} egressACLs := []*nbdb.ACL{egressDenyACL, egressARPAllowACL} - if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + if includeICMP { egressICMPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+icmpAllowPolicyMatch, false, true) testData = append(testData, egressICMPAllowACL) egressACLs = append(egressACLs, egressICMPAllowACL) @@ -94,7 +98,7 @@ func getStaleDefaultDenyData(networkPolicy *knet.NetworkPolicy) []libovsdbtest.T ingressACLs := []*nbdb.ACL{ingressDenyACL, ingressARPAllowACL} testData = append(testData, ingressDenyACL, ingressARPAllowACL) - if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + if includeICMP { ingressICMPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+icmpAllowPolicyMatch, false, false) testData = append(testData, ingressICMPAllowACL) ingressACLs = append(ingressACLs, ingressICMPAllowACL) @@ -296,6 +300,34 @@ var _ = ginkgo.Describe("OVN Stale NetworkPolicy Operations", func() { ginkgo.Entry("with allow ICMP network policy enabled", true), ) + ginkgo.It("reconciles with allow ICMP network policy disabled and removes stale ICMP default deny ACLs", func() { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = false + namespace1 := *newNamespace(namespaceName1) + namespace2 := *newNamespace(namespaceName2) + networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, + namespace2.Name, "", true, true) + // start with stale ACLs containing ICMP allow ACLs from a previously enabled config + gressPolicyInitialData := getStalePolicyData(networkPolicy, []string{namespace2.Name}) + defaultDenyInitialData := getStaleDefaultDenyDataWithICMP(networkPolicy, true) + initialData := initialDB.NBData + initialData = append(initialData, gressPolicyInitialData...) + initialData = append(initialData, defaultDenyInitialData...) + startOvn(libovsdbtest.TestSetup{NBData: initialData}, []corev1.Namespace{namespace1, namespace2}, + []knet.NetworkPolicy{*networkPolicy}) + + fakeOvn.asf.ExpectEmptyAddressSet(namespaceName1) + fakeOvn.asf.ExpectEmptyAddressSet(namespaceName2) + + _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // make sure stale ICMP ACLs were removed to match disabled allow-icmp config + expectedData := getNamespaceWithSinglePolicyExpectedData( + newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), + initialDB.NBData) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData...)) + }) + ginkgo.It("reconciles an existing networkPolicy updating stale ACLs with long names", func() { longNamespaceName63 := "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk" // longest allowed namespace name namespace1 := *newNamespace(longNamespaceName63) From d033767f5bd1fc205eec76248e8b01f52a66f1a0 Mon Sep 17 00:00:00 2001 From: Amin Aflatoonian <8513427+Aminiok@users.noreply.github.com> Date: Tue, 24 Feb 2026 11:59:19 +0100 Subject: [PATCH 49/59] Add Nutanix to adopters page Nutanix (Builds Flow CNI on OVN-Kubernetes, integrated with Nutanix Flow and VPC networking) Signed-off-by: Amin Aflatoonian <8513427+Aminiok@users.noreply.github.com> --- ADOPTERS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ADOPTERS.md b/ADOPTERS.md index a513042e95..784dde2030 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -6,6 +6,7 @@ 2. NVIDIA (Uses OVN-Kubernetes in their production environments) 3. Internet Initiative Japan Inc. (Uses OVN-Kubernetes in their on-premise Kubernetes platform) 4. SAIC Motor Corp. Ltd (Uses OVN-Kubernetes as a networking solution to build a multi-tenant private cloud) +5. Nutanix (Builds Flow CNI on OVN-Kubernetes, integrated with Nutanix Flow and VPC networking) ## Projects From 533f66e7932e1bc97645d61389d79c7827a67189 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Mon, 23 Feb 2026 14:55:38 -0500 Subject: [PATCH 50/59] Fix UDN network controller deadlock due to stopChan nil race In UDNs, goroutines are started for some controllers like NetworkQoS where a waitgroup is used to add a reference to the goroutine, and then stopChan is passed as a mechanism to shutdown the NetworkQoS controller. If the UDN controller starts, and then shutsdown very quickly, the stopChan is closed and reset to nil. It is set to nil as a pattern we use to guard multiple Stop calls to the UDN controller (Stop may be called multiple times). However, if the NetworkQoS goroutine does not finish starting before the stopChan is closed and reset to nil, then by the time NetworkQos gets to read stopChan, it will hang forever, causing the UDN controller waitgroup to wait forever. This will deadlock the entire network manager from being able to start/stop anymore UDN controllers! We can see this behavior in CI here: I0223 04:37:24.677192 77 network_controller.go:415] [zone-nad-controller network controller]: sync network wpnhc_tenant-blue I0223 04:37:24.677203 77 localnet_user_defined_network_controller.go:311] Stoping controller for UDN wpnhc_tenant-blue I0223 04:37:24.677209 77 base_secondary_layer2_network_controller.go:39] Stop secondary localnet network controller of network wpnhc_tenant-blue I0223 04:37:24.677241 77 obj_retry.go:473] Stop channel got triggered: will stop retrying failed objects of type *v1.Namespace I0223 04:37:24.677250 77 network_qos_controller.go:215] Starting controller wpnhc_tenant-blue-network-controller I0223 04:37:24.677256 77 network_qos_controller.go:218] Waiting for informer caches (networkqos,namespace,pod,node) to sync I0223 04:37:24.677263 77 obj_retry.go:473] Stop channel got triggered: will stop retrying failed objects of type *v1beta2.MultiNetworkPolicy I0223 04:37:24.677270 77 shared_informer.go:349] "Waiting for caches to sync" controller="wpnhc_tenant-blue-network-controller" I0223 04:37:24.677339 77 shared_informer.go:356] "Caches are synced" controller="wpnhc_tenant-blue-network-controller" There is never a "finished syncing network wpnhc_tenant-blue" log again after this for zone-nad-controller, nor any other networks for that matter after this point in the log. However, there are logs for node-nad-controller as it did not hit this race. To fix this, pass a copy of the oc.stopChan to the goroutines. Channels are copied as a reference so closing the oc.stopChan still closes the copy, and we can still allow oc.stopChan to be set to nil as a Stop guard. Signed-off-by: Tim Rozet --- .../node/default_node_network_controller.go | 18 +++++++++--------- ...base_secondary_layer2_network_controller.go | 6 +++--- .../layer3_user_defined_network_controller.go | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 512aa6fb53..18e0d4c7fe 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -969,10 +969,10 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { return err } nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() - nodeController.Run(nc.stopChan) - }() + nodeController.Run(stopCh) + }(nc.stopChan) } else if config.OvnKubeNode.Mode != types.NodeModeDPUHost { // attempt to cleanup the possibly stale bridge _, stderr, err := util.RunOVSVsctl("--if-exists", "del-br", "br-ext") @@ -1080,7 +1080,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { nc.linkManager.Run(nc.stopChan, nc.wg) nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() podResClient, err := podresourcesapi.New() if err != nil { @@ -1092,8 +1092,8 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { klog.V(4).Infof("Error closing PodResourcesAPI client: %v", err) } }() - ovspinning.Run(ctx, nc.stopChan, podResClient) - }() + ovspinning.Run(ctx, stopCh, podResClient) + }(nc.stopChan) klog.Infof("Default node network controller initialized and ready.") return nil @@ -1135,10 +1135,10 @@ func (nc *DefaultNodeNetworkController) startEgressIPHealthCheckingServer(mgmtPo } nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() - healthServer.Run(nc.stopChan) - }() + healthServer.Run(stopCh) + }(nc.stopChan) return nil } diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index e28c138247..5a5f7afc79 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -150,11 +150,11 @@ func (oc *BaseLayer2UserDefinedNetworkController) run() error { return fmt.Errorf("unable to create network qos controller, err: %w", err) } oc.wg.Add(1) - go func() { + go func(ch <-chan struct{}) { defer oc.wg.Done() // Until we have scale issues in future let's spawn only one thread - oc.nqosController.Run(1, oc.stopChan) - }() + oc.nqosController.Run(1, ch) + }(oc.stopChan) } // Add ourselves to the route import manager diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go index b28d1a7ae3..a1654d3244 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go @@ -662,11 +662,11 @@ func (oc *Layer3UserDefinedNetworkController) run() error { return fmt.Errorf("unable to create network qos controller, err: %w", err) } oc.wg.Add(1) - go func() { + go func(ch <-chan struct{}) { defer oc.wg.Done() // Until we have scale issues in future let's spawn only one thread - oc.nqosController.Run(1, oc.stopChan) - }() + oc.nqosController.Run(1, ch) + }(oc.stopChan) } klog.Infof("Completing all the Watchers for network %s took %v", oc.GetNetworkName(), time.Since(start)) From 9d7b70f8df7060b56e28cdd0a5c588ffcf0e34fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Hern=C3=A1ndez?= Date: Tue, 24 Feb 2026 21:39:46 -0600 Subject: [PATCH 51/59] docs: user-defined-networks: Fix markdown syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: - #6014 - https://github.com/ovn-kubernetes/ovn-kubernetes/issues/6014 Signed-off-by: Andrés Hernández --- docs/features/user-defined-networks/user-defined-networks.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/features/user-defined-networks/user-defined-networks.md b/docs/features/user-defined-networks/user-defined-networks.md index f2e7348eb7..d7cb90249d 100644 --- a/docs/features/user-defined-networks/user-defined-networks.md +++ b/docs/features/user-defined-networks/user-defined-networks.md @@ -124,6 +124,7 @@ of end users. Currently supported topology types for a given network include: `Layer3`: is a topology type wherein the pods or VMs are connected to their node’s local router and all these routers are then connected to the distributed switch across nodes. + * Each pod would hence get an IP from the node's subnet segment * When in doubt which topology to use go with layer3 which is the same topology as the cluster default network @@ -142,6 +143,7 @@ network (grey color) which is only used for kubelet healthchecks. `Layer2`: is a topology type wherein the pods or VMs are all connected to the same layer2 flat switch. + * Usually used when the applications deployed expect a layer2 type network connection (Perhaps applications want a single broadcast domain, latency sensitive, use proprietary L2 protocols) * Common in Virtualization world for seamless migration of the VM since @@ -160,6 +162,7 @@ network (grey color) which is only used for kubelet healthchecks. `Localnet`: is a topology type wherein the pods or VMs attached to a localnet network on the overlay can egress to the provider’s physical network + * without SNATing to nodeIPs… preserves the podIPs * podIPs can be on the same subnet as the provider’s VLAN * VLAN IDs can be used to mark the traffic coming from the localnet for From 0b82e64f45d5bc0bc87d2f477d8d564c572c45ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Hern=C3=A1ndez?= Date: Tue, 24 Feb 2026 21:40:40 -0600 Subject: [PATCH 52/59] docs: user-defined-network: Fix 'l2-UDN' image link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: - #6014 - https://github.com/ovn-kubernetes/ovn-kubernetes/issues/6014 Signed-off-by: Andrés Hernández --- docs/features/user-defined-networks/user-defined-networks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/features/user-defined-networks/user-defined-networks.md b/docs/features/user-defined-networks/user-defined-networks.md index d7cb90249d..ac94e519f3 100644 --- a/docs/features/user-defined-networks/user-defined-networks.md +++ b/docs/features/user-defined-networks/user-defined-networks.md @@ -151,7 +151,7 @@ same layer2 flat switch. during live migration * Can be of type `primary` or `secondary` -![l2-UDN](images/L2DeepDive-2segments.png) +![l2-UDN](images/L2DeepDive-2segments.jpg) Here we can see a blue and green P-UDN. On node1, pod1 is part of green UDN and pod2 is part of blue UDN. They each have a udn-0 interface that is attached to From 3c67139a2470c640e6f1c1863b82de751790ecbb Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Wed, 25 Feb 2026 12:53:36 +0100 Subject: [PATCH 53/59] (B)ANP conformance: update framework to use retries Signed-off-by: Nadia Pinaeva --- test/conformance/go.mod | 2 +- test/conformance/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/conformance/go.mod b/test/conformance/go.mod index c8e5e1c2fa..db85159bc0 100644 --- a/test/conformance/go.mod +++ b/test/conformance/go.mod @@ -9,7 +9,7 @@ require ( k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 sigs.k8s.io/controller-runtime v0.22.1 - sigs.k8s.io/network-policy-api v0.1.8 + sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00 ) require ( diff --git a/test/conformance/go.sum b/test/conformance/go.sum index 32cc425e5f..140d9575cf 100644 --- a/test/conformance/go.sum +++ b/test/conformance/go.sum @@ -182,8 +182,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/network-policy-api v0.1.8 h1:p/VY4aX6LqohGx4sH1X3jdQh6BZ/Gb+8DoQhHKC1fZQ= -sigs.k8s.io/network-policy-api v0.1.8/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= +sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00 h1:k9sO9mBPtR4hRBiTQbk2hLVdDXBEdM4m5TEjZetT360= +sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= From 15d73b3d7c942c40dd91bbf57596e153f76d2d9f Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Wed, 25 Feb 2026 13:02:47 +0100 Subject: [PATCH 54/59] Revert "(B)ANP conformance: update framework to use retries" This reverts commit 3c67139a2470c640e6f1c1863b82de751790ecbb. --- test/conformance/go.mod | 2 +- test/conformance/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/conformance/go.mod b/test/conformance/go.mod index db85159bc0..c8e5e1c2fa 100644 --- a/test/conformance/go.mod +++ b/test/conformance/go.mod @@ -9,7 +9,7 @@ require ( k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 sigs.k8s.io/controller-runtime v0.22.1 - sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00 + sigs.k8s.io/network-policy-api v0.1.8 ) require ( diff --git a/test/conformance/go.sum b/test/conformance/go.sum index 140d9575cf..32cc425e5f 100644 --- a/test/conformance/go.sum +++ b/test/conformance/go.sum @@ -182,8 +182,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00 h1:k9sO9mBPtR4hRBiTQbk2hLVdDXBEdM4m5TEjZetT360= -sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= +sigs.k8s.io/network-policy-api v0.1.8 h1:p/VY4aX6LqohGx4sH1X3jdQh6BZ/Gb+8DoQhHKC1fZQ= +sigs.k8s.io/network-policy-api v0.1.8/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= From 721400a4ea65bdc8096166ce98c09af26a55ab93 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Wed, 25 Feb 2026 12:53:36 +0100 Subject: [PATCH 55/59] (B)ANP conformance: update framework to use retries Signed-off-by: Nadia Pinaeva --- test/conformance/go.mod | 2 +- test/conformance/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/conformance/go.mod b/test/conformance/go.mod index c8e5e1c2fa..db85159bc0 100644 --- a/test/conformance/go.mod +++ b/test/conformance/go.mod @@ -9,7 +9,7 @@ require ( k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 sigs.k8s.io/controller-runtime v0.22.1 - sigs.k8s.io/network-policy-api v0.1.8 + sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00 ) require ( diff --git a/test/conformance/go.sum b/test/conformance/go.sum index 32cc425e5f..140d9575cf 100644 --- a/test/conformance/go.sum +++ b/test/conformance/go.sum @@ -182,8 +182,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/network-policy-api v0.1.8 h1:p/VY4aX6LqohGx4sH1X3jdQh6BZ/Gb+8DoQhHKC1fZQ= -sigs.k8s.io/network-policy-api v0.1.8/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= +sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00 h1:k9sO9mBPtR4hRBiTQbk2hLVdDXBEdM4m5TEjZetT360= +sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= From 99107d337fb82c0cd145d3ad999fea904adae649 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Thu, 4 Dec 2025 10:39:53 -0500 Subject: [PATCH 56/59] Implements DPU Health Check From OKEP #5674 Signed-off-by: Tim Rozet --- dist/templates/rbac-ovnkube-node.yaml.j2 | 33 ++ docs/features/hardware-offload/dpu-support.md | 12 + .../ovn-k8s-cni-overlay.go | 7 +- go-controller/pkg/cni/cni.go | 7 + go-controller/pkg/cni/cniserver.go | 78 ++++- go-controller/pkg/cni/cniserver_test.go | 121 ++++++- go-controller/pkg/cni/cnishim.go | 27 ++ go-controller/pkg/cni/types.go | 9 + go-controller/pkg/config/config.go | 35 +- go-controller/pkg/config/config_test.go | 107 +++++- .../node/default_node_network_controller.go | 40 ++- go-controller/pkg/node/dpulease/manager.go | 308 ++++++++++++++++++ .../pkg/node/dpulease/manager_test.go | 171 ++++++++++ .../templates/rbac-ovnkube-node.yaml | 36 ++ 14 files changed, 953 insertions(+), 38 deletions(-) create mode 100644 go-controller/pkg/node/dpulease/manager.go create mode 100644 go-controller/pkg/node/dpulease/manager_test.go diff --git a/dist/templates/rbac-ovnkube-node.yaml.j2 b/dist/templates/rbac-ovnkube-node.yaml.j2 index 40cebd2294..e74c99ce29 100644 --- a/dist/templates/rbac-ovnkube-node.yaml.j2 +++ b/dist/templates/rbac-ovnkube-node.yaml.j2 @@ -235,3 +235,36 @@ rules: {% if ovn_enable_interconnect == "true" -%} - create {%- endif %} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: ovnkube-node-dpu-leases + namespace: ovn-kubernetes +roleRef: + name: ovnkube-node-dpu-leases + kind: Role + apiGroup: rbac.authorization.k8s.io +subjects: + {% if ovn_enable_ovnkube_identity == "true" -%} + - kind: Group + name: system:ovn-nodes + apiGroup: rbac.authorization.k8s.io + {% else %} + - kind: ServiceAccount + name: ovnkube-node + namespace: ovn-kubernetes + {%- endif %} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: ovnkube-node-dpu-leases + namespace: ovn-kubernetes +rules: + - apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: [ "get", "create", "update" ] diff --git a/docs/features/hardware-offload/dpu-support.md b/docs/features/hardware-offload/dpu-support.md index 2c5c23e028..2aac8e6965 100644 --- a/docs/features/hardware-offload/dpu-support.md +++ b/docs/features/hardware-offload/dpu-support.md @@ -55,3 +55,15 @@ For detailed configuration of gateway interfaces in DPU host mode, see [DPU Gate - ovnkube-controller-with-node - ovn-controller - ovs-metrics + +## DPU health monitoring + +OVN-Kubernetes uses a custom Kubernetes `Lease` in the `ovn-kubernetes` namespace to track the health of the DPU side of a trusted deployment. +The DPU host creates the lease and sets an owner reference to the Kubernetes `Node`, while ovnkube running on the DPU renews the lease on a regular interval. + +Two ovnkube-node options control this behavior: +- `--dpu-node-lease-renew-interval` (seconds, default 10). Set to `0` to disable the health check. +- `--dpu-node-lease-duration` (seconds, default 40). + +If the lease expires, the DPU host CNI server fails `ADD` requests immediately with `DPU Not Ready` and the `STATUS` command returns a CNI error with code `50` (The plugin is not available). +This causes the container runtime to report `NetworkReady=false`, preventing new workloads from landing on the affected host until the DPU becomes healthy again. diff --git a/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go b/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go index 88d94faeb5..8a31455f29 100644 --- a/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go +++ b/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go @@ -22,9 +22,10 @@ func main() { c.Action = func(_ *cli.Context) error { skel.PluginMainFuncs( skel.CNIFuncs{ - Add: p.CmdAdd, - Check: p.CmdCheck, - Del: p.CmdDel, + Add: p.CmdAdd, + Check: p.CmdCheck, + Del: p.CmdDel, + Status: p.CmdStatus, }, version.All, bv.BuildString("ovn-k8s-cni-overlay")) diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index 2a7b71b77d..98151304bd 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -35,6 +35,8 @@ var ( BandwidthNotFound = ¬FoundError{} ) +const dpuNotReadyMsg = "DPU Not Ready" + type direction int func (d direction) String() string { @@ -457,7 +459,12 @@ func HandlePodRequest( response, err = request.cmdDel(clientset) case CNICheck: err = request.cmdCheck() + case CNIUpdate: + // No-op update path today + case CNIStatus: + // handled by DPU health check gating before reaching here default: + err = fmt.Errorf("unsupported CNI command %s", request.Command) } if response != nil { diff --git a/go-controller/pkg/cni/cniserver.go b/go-controller/pkg/cni/cniserver.go index 19378f4483..c2474f0b97 100644 --- a/go-controller/pkg/cni/cniserver.go +++ b/go-controller/pkg/cni/cniserver.go @@ -4,12 +4,14 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "fmt" "io" "net/http" "strings" "time" + cnitypes "github.com/containernetworking/cni/pkg/types" "github.com/gorilla/mux" nadv1Listers "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1" @@ -27,6 +29,8 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) +const kubeletDefaultCRIOperationTimeout = 2 * time.Minute + // *** The Server is PRIVATE API between OVN components and may be // changed at any time. It is in no way a supported interface or API. *** // @@ -58,6 +62,7 @@ func NewCNIServer( kclient kubernetes.Interface, networkManager networkmanager.Interface, ovsClient client.Client, + dpuHealth DPUStatusProvider, ) (*Server, error) { var nadLister nadv1Listers.NetworkAttachmentDefinitionLister @@ -88,6 +93,7 @@ func NewCNIServer( handlePodRequestFunc: HandlePodRequest, networkManager: networkManager, ovsClient: ovsClient, + dpuHealth: dpuHealth, } if len(config.Kubernetes.CAData) > 0 { @@ -99,6 +105,15 @@ func NewCNIServer( router.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { result, err := s.handleCNIRequest(r) if err != nil { + var cniErr *cnitypes.Error + if errors.As(err, &cniErr) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusBadRequest) + if encodeErr := json.NewEncoder(w).Encode(cniErr); encodeErr != nil { + klog.Warningf("Failed to write CNI error response: %v", encodeErr) + } + return + } http.Error(w, fmt.Sprintf("%v", err), http.StatusBadRequest) return } @@ -141,7 +156,22 @@ func cniRequestToPodRequest(cr *Request) (*PodRequest, error) { } req := &PodRequest{ - Command: command(cmd), + Command: command(cmd), + timestamp: time.Now(), + } + + conf, err := config.ReadCNIConfig(cr.Config) + if err != nil { + return nil, fmt.Errorf("broken stdin args") + } + req.CNIConf = conf + req.deviceInfo = cr.DeviceInfo + + // STATUS requests do not carry pod-specific context. Return early after validating config. + if req.Command == CNIStatus { + // Match the Kubelet default CRI operation timeout of 2m. + req.ctx, req.cancel = context.WithTimeout(context.Background(), kubeletDefaultCRIOperationTimeout) + return req, nil } req.SandboxID, ok = cr.Env["CNI_CONTAINERID"] @@ -182,11 +212,6 @@ func cniRequestToPodRequest(cr *Request) (*PodRequest, error) { // containerd 1.5: https://github.com/containerd/containerd/pull/5643 req.PodUID = cniArgs["K8S_POD_UID"] - conf, err := config.ReadCNIConfig(cr.Config) - if err != nil { - return nil, fmt.Errorf("broken stdin args") - } - // the first network to the Pod is always named as `default`, // capture the effective NAD Name here req.netName = conf.Name @@ -211,11 +236,8 @@ func cniRequestToPodRequest(cr *Request) (*PodRequest, error) { } } - req.CNIConf = conf - req.deviceInfo = cr.DeviceInfo - req.timestamp = time.Now() - // Match the Kubelet default CRI operation timeout of 2m - req.ctx, req.cancel = context.WithTimeout(context.Background(), 2*time.Minute) + // Match the Kubelet default CRI operation timeout of 2m. + req.ctx, req.cancel = context.WithTimeout(context.Background(), kubeletDefaultCRIOperationTimeout) return req, nil } @@ -233,10 +255,18 @@ func (s *Server) handleCNIRequest(r *http.Request) ([]byte, error) { } defer req.cancel() + if err := s.checkDPUHealth(req); err != nil { + return nil, err + } + result, err := s.handlePodRequestFunc(req, s.clientSet, s.kubeAuth, s.networkManager, s.ovsClient) if err != nil { // Prefix error with request information for easier debugging - return nil, fmt.Errorf("%s %v", req, err) + var cniErr *cnitypes.Error + if !errors.As(err, &cniErr) { + err = fmt.Errorf("%s %w", req, err) + } + return nil, err } return result, nil } @@ -258,3 +288,27 @@ func (s *Server) handleCNIMetrics(w http.ResponseWriter, r *http.Request) { klog.Warningf("Error writing %s HTTP response for metrics post", err) } } + +func (s *Server) checkDPUHealth(req *PodRequest) error { + if s.dpuHealth == nil || config.OvnKubeNode.Mode != types.NodeModeDPUHost { + return nil + } + + if req.Command != CNIAdd && req.Command != CNIStatus { + return nil + } + + ready, reason := s.dpuHealth.Ready() + if ready { + return nil + } + + msg := dpuNotReadyMsg + if reason != "" { + msg = fmt.Sprintf("%s: %s", msg, reason) + } + if req.Command == CNIStatus { + return &cnitypes.Error{Code: 50, Msg: msg} + } + return fmt.Errorf("%s", msg) +} diff --git a/go-controller/pkg/cni/cniserver_test.go b/go-controller/pkg/cni/cniserver_test.go index c070616dbc..484f6cfe52 100644 --- a/go-controller/pkg/cni/cniserver_test.go +++ b/go-controller/pkg/cni/cniserver_test.go @@ -28,6 +28,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -57,7 +58,7 @@ var expectedResult cnitypes.Result func serverHandleCNI(request *PodRequest, _ *ClientSet, _ *KubeAPIAuth, _ networkmanager.Interface, _ client.Client) ([]byte, error) { if request.Command == CNIAdd { return json.Marshal(&expectedResult) - } else if request.Command == CNIDel || request.Command == CNIUpdate || request.Command == CNICheck { + } else if request.Command == CNIDel || request.Command == CNIUpdate || request.Command == CNICheck || request.Command == CNIStatus { return nil, nil } return nil, fmt.Errorf("unhandled CNI command %v", request.Command) @@ -103,7 +104,7 @@ func TestCNIServer(t *testing.T) { if err != nil { t.Fatalf("failed to call newOVSClientWithExternalIDs: %v", err) } - s, err := NewCNIServer(wf, fakeClient, networkmanager.Default().Interface(), ovsClient) + s, err := NewCNIServer(wf, fakeClient, networkmanager.Default().Interface(), ovsClient, nil) if err != nil { t.Fatalf("error creating CNI server: %v", err) } @@ -218,6 +219,17 @@ func TestCNIServer(t *testing.T) { }, result: nil, }, + // STATUS request + { + name: "STATUS", + request: &Request{ + Env: map[string]string{ + "CNI_COMMAND": string(CNIStatus), + }, + Config: []byte(cniConfig), + }, + result: nil, + }, // Missing CNI_ARGS { name: "ARGS1", @@ -287,3 +299,108 @@ func TestCNIServer(t *testing.T) { } } } + +func TestCNIServerStatusNotReady(t *testing.T) { + tmpDir, err := utiltesting.MkTmpdir("cniserver-status") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + socketPath := filepath.Join(tmpDir, serverSocketName) + fakeClient := fake.NewSimpleClientset() + err = config.PrepareTestConfig() + if err != nil { + t.Fatalf("failed to prepare test config: %v", err) + } + fakeClientset := &util.OVNNodeClientset{ + KubeClient: fakeClient, + } + wf, err := factory.NewNodeWatchFactory(fakeClientset, nodeName) + if err != nil { + t.Fatalf("failed to create watch factory: %v", err) + } + if err := wf.Start(); err != nil { + t.Fatalf("failed to start watch factory: %v", err) + } + + ovsClient, err := newOVSClientWithExternalIDs(map[string]string{}) + if err != nil { + t.Fatalf("failed to call newOVSClientWithExternalIDs: %v", err) + } + dpuHealth := &fakeDPUHealth{ready: false, reason: "lease expired"} + s, err := NewCNIServer(wf, fakeClient, networkmanager.Default().Interface(), ovsClient, dpuHealth) + if err != nil { + t.Fatalf("error creating CNI server: %v", err) + } + if err := s.Start(tmpDir); err != nil { + t.Fatalf("error starting CNI server: %v", err) + } + + client := &http.Client{ + Transport: &http.Transport{ + Dial: func(_, _ string) (net.Conn, error) { + return net.Dial("unix", socketPath) + }, + }, + } + + testcases := []struct { + name string + mode string + expectCode int + expectErr bool + }{ + { + name: "DPUHostNotReady", + mode: types.NodeModeDPUHost, + expectCode: http.StatusBadRequest, + expectErr: true, + }, + { + name: "FullModeIgnoresHealth", + mode: types.NodeModeFull, + expectCode: http.StatusOK, + expectErr: false, + }, + } + + for _, tc := range testcases { + config.OvnKubeNode.Mode = tc.mode + body, code := clientDoCNI(t, client, &Request{ + Env: map[string]string{ + "CNI_COMMAND": string(CNIStatus), + }, + Config: []byte(cniConfig), + }) + if code != tc.expectCode { + t.Fatalf("[%s] expected status %v but got %v", tc.name, tc.expectCode, code) + } + if tc.expectErr { + var cniErr cnitypes.Error + if err := json.Unmarshal(body, &cniErr); err != nil { + t.Fatalf("[%s] failed to unmarshal error response: %v", tc.name, err) + } + if cniErr.Code != 50 { + t.Fatalf("[%s] expected CNI error code 50 but got %d", tc.name, cniErr.Code) + } + if !strings.Contains(cniErr.Msg, dpuNotReadyMsg) { + t.Fatalf("[%s] expected error to mention DPU not ready, got %q", tc.name, cniErr.Msg) + } + if !strings.Contains(cniErr.Msg, "lease expired") { + t.Fatalf("[%s] expected error to include lease reason, got %q", tc.name, cniErr.Msg) + } + } else if len(body) != 0 { + t.Fatalf("[%s] expected empty body for success, got %q", tc.name, string(body)) + } + } +} + +type fakeDPUHealth struct { + ready bool + reason string +} + +func (f *fakeDPUHealth) Ready() (bool, string) { + return f.ready, f.reason +} diff --git a/go-controller/pkg/cni/cnishim.go b/go-controller/pkg/cni/cnishim.go index df6edaad2a..7bfbfdbe0c 100644 --- a/go-controller/pkg/cni/cnishim.go +++ b/go-controller/pkg/cni/cnishim.go @@ -98,6 +98,10 @@ func (p *Plugin) doCNI(url string, req interface{}) ([]byte, error) { } if resp.StatusCode != 200 { + var cniErr types.Error + if err := json.Unmarshal(body, &cniErr); err == nil && cniErr.Code != 0 { + return nil, &cniErr + } return nil, fmt.Errorf("CNI request failed with status %v: '%s'", resp.StatusCode, string(body)) } @@ -339,6 +343,29 @@ func (p *Plugin) CmdDel(args *skel.CmdArgs) error { return err } +// CmdStatus is the callback for plugin readiness checks +func (p *Plugin) CmdStatus(args *skel.CmdArgs) error { + var err error + + startTime := time.Now() + defer func() { + p.postMetrics(startTime, CNIStatus, err) + if err != nil { + klog.Errorf("Error on CmdStatus: %v", err) + } + }() + + conf, err := config.ReadCNIConfig(args.StdinData) + if err != nil { + return err + } + setupLogging(conf) + + req := newCNIRequest(args, nadapi.DeviceInfo{}) + _, err = p.doCNIFunc("http://dummy/", req) + return err +} + // CmdCheck is the callback for 'checking' container's networking is as expected. func (p *Plugin) CmdCheck(_ *skel.CmdArgs) error { // noop...CMD check is not considered useful, and has a considerable performance impact diff --git a/go-controller/pkg/cni/types.go b/go-controller/pkg/cni/types.go index 866d5e2749..aaeec4ee5d 100644 --- a/go-controller/pkg/cni/types.go +++ b/go-controller/pkg/cni/types.go @@ -81,6 +81,9 @@ const CNIDel command = "DEL" // CNICheck is the command representing check operation on a pod const CNICheck command = "CHECK" +// CNIStatus is the command representing a plugin readiness check +const CNIStatus command = "STATUS" + // Request sent to the Server by the OVN CNI plugin type Request struct { // CNI environment variables, like CNI_COMMAND and CNI_NETNS @@ -201,6 +204,11 @@ func NewClientSet(kclient kubernetes.Interface, podLister corev1listers.PodListe } } +// DPUStatusProvider reports whether the DPU is ready to service CNI requests. +type DPUStatusProvider interface { + Ready() (bool, string) +} + // Server object that listens for JSON-marshaled Request objects // on a private root-only Unix domain socket. type Server struct { @@ -210,4 +218,5 @@ type Server struct { kubeAuth *KubeAPIAuth networkManager networkmanager.Interface ovsClient client.Client + dpuHealth DPUStatusProvider } diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 55c985a187..03b937d291 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -236,7 +236,9 @@ var ( // OvnKubeNode holds ovnkube-node parsed config file parameters and command-line overrides OvnKubeNode = OvnKubeNodeConfig{ - Mode: types.NodeModeFull, + Mode: types.NodeModeFull, + DPUNodeLeaseRenewInterval: 10, + DPUNodeLeaseDuration: 40, } ClusterManager = ClusterManagerConfig{ @@ -639,9 +641,11 @@ type HybridOverlayConfig struct { // OvnKubeNodeConfig holds ovnkube-node configurations type OvnKubeNodeConfig struct { - Mode string `gcfg:"mode"` - MgmtPortNetdev string `gcfg:"mgmt-port-netdev"` - MgmtPortDPResourceName string `gcfg:"mgmt-port-dp-resource-name"` + Mode string `gcfg:"mode"` + MgmtPortNetdev string `gcfg:"mgmt-port-netdev"` + MgmtPortDPResourceName string `gcfg:"mgmt-port-dp-resource-name"` + DPUNodeLeaseRenewInterval int `gcfg:"dpu-node-lease-renew-interval"` + DPUNodeLeaseDuration int `gcfg:"dpu-node-lease-duration"` } // ClusterManagerConfig holds configuration for ovnkube-cluster-manager @@ -1832,6 +1836,18 @@ var OvnKubeNodeFlags = []cli.Flag{ Value: OvnKubeNode.MgmtPortDPResourceName, Destination: &cliConfig.OvnKubeNode.MgmtPortDPResourceName, }, + &cli.IntFlag{ + Name: "dpu-node-lease-renew-interval", + Usage: "Interval in seconds at which the DPU updates its custom node lease. Set to 0 to disable DPU health checking", + Value: OvnKubeNode.DPUNodeLeaseRenewInterval, + Destination: &cliConfig.OvnKubeNode.DPUNodeLeaseRenewInterval, + }, + &cli.IntFlag{ + Name: "dpu-node-lease-duration", + Usage: "Lease duration in seconds before the DPU is considered unhealthy", + Value: OvnKubeNode.DPUNodeLeaseDuration, + Destination: &cliConfig.OvnKubeNode.DPUNodeLeaseDuration, + }, } // ClusterManagerFlags captures ovnkube-cluster-manager specific configurations @@ -3182,6 +3198,17 @@ func buildOvnKubeNodeConfig(cli, file *config) error { return fmt.Errorf("hybrid overlay is not supported with ovnkube-node mode %s", OvnKubeNode.Mode) } + if OvnKubeNode.DPUNodeLeaseRenewInterval < 0 { + return fmt.Errorf("invalid dpu-node-lease-renew-interval '%d'. must be >= 0", OvnKubeNode.DPUNodeLeaseRenewInterval) + } + if OvnKubeNode.DPUNodeLeaseDuration <= 0 { + return fmt.Errorf("invalid dpu-node-lease-duration '%d'. must be > 0", OvnKubeNode.DPUNodeLeaseDuration) + } + if OvnKubeNode.DPUNodeLeaseDuration <= OvnKubeNode.DPUNodeLeaseRenewInterval { + return fmt.Errorf("invalid dpu-node-lease-duration '%d'. must be > dpu-node-lease-renew-interval '%d'", + OvnKubeNode.DPUNodeLeaseDuration, OvnKubeNode.DPUNodeLeaseRenewInterval) + } + // Warn the user if both MgmtPortNetdev and MgmtPortDPResourceName are specified since they // configure the management port. if OvnKubeNode.MgmtPortNetdev != "" && OvnKubeNode.MgmtPortDPResourceName != "" { diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 2a108e39d6..ae8258581c 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -1974,12 +1974,16 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Overrides value from Config file", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, + Mode: types.NodeModeFull, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } file := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPU, + Mode: types.NodeModeDPU, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } err := buildOvnKubeNodeConfig(&cliConfig, &file) @@ -1990,9 +1994,11 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Overrides value from CLI", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPUHost, - MgmtPortNetdev: "enp1s0f0v0", - MgmtPortDPResourceName: "openshift.io/mgmtvf", + Mode: types.NodeModeDPUHost, + MgmtPortNetdev: "enp1s0f0v0", + MgmtPortDPResourceName: "openshift.io/mgmtvf", + DPUNodeLeaseRenewInterval: 5, + DPUNodeLeaseDuration: 20, }, } err := buildOvnKubeNodeConfig(&cliConfig, &config{}) @@ -2000,6 +2006,8 @@ udn-allowed-default-services= ns/svc, ns1/svc1 gomega.Expect(OvnKubeNode.Mode).To(gomega.Equal(types.NodeModeDPUHost)) gomega.Expect(OvnKubeNode.MgmtPortNetdev).To(gomega.Equal("enp1s0f0v0")) gomega.Expect(OvnKubeNode.MgmtPortDPResourceName).To(gomega.Equal("openshift.io/mgmtvf")) + gomega.Expect(OvnKubeNode.DPUNodeLeaseRenewInterval).To(gomega.Equal(5)) + gomega.Expect(OvnKubeNode.DPUNodeLeaseDuration).To(gomega.Equal(20)) }) It("Fails with unsupported mode", func() { @@ -2026,14 +2034,71 @@ udn-allowed-default-services= ns/svc, ns1/svc1 "hybrid overlay is not supported with ovnkube-node mode")) }) + It("Fails if DPU node lease renew interval is negative", func() { + cliConfig := config{ + OvnKubeNode: OvnKubeNodeConfig{ + Mode: types.NodeModeFull, + DPUNodeLeaseRenewInterval: -1, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + }, + } + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("dpu-node-lease-renew-interval")) + }) + + It("Succeeds if DPU node lease renew interval is zero", func() { + cliConfig := config{ + OvnKubeNode: OvnKubeNodeConfig{ + Mode: types.NodeModeFull, + DPUNodeLeaseRenewInterval: 0, + DPUNodeLeaseDuration: 10, + }, + } + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(OvnKubeNode.DPUNodeLeaseRenewInterval).To(gomega.Equal(0)) + gomega.Expect(OvnKubeNode.DPUNodeLeaseDuration).To(gomega.Equal(10)) + }) + + It("Fails if DPU node lease duration is non-positive", func() { + cliConfig := config{ + OvnKubeNode: OvnKubeNodeConfig{ + Mode: types.NodeModeFull, + DPUNodeLeaseDuration: 0, + }, + } + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("dpu-node-lease-duration")) + }) + + It("Fails if DPU node lease duration is less than or equal to renew interval", func() { + cliConfig := config{ + OvnKubeNode: OvnKubeNodeConfig{ + Mode: types.NodeModeFull, + DPUNodeLeaseRenewInterval: 10, + DPUNodeLeaseDuration: 10, + }, + } + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.Or( + gomega.ContainSubstring("dpu-node-lease-duration"), + gomega.ContainSubstring("dpu-node-lease-renew-interval"), + )) + }) + It("Fails if management port is provided and ovnkube node mode is dpu", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPU, - MgmtPortNetdev: "enp1s0f0v0", + Mode: types.NodeModeDPU, + MgmtPortNetdev: "enp1s0f0v0", + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } - err := buildOvnKubeNodeConfig(&cliConfig, &config{}) + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) gomega.Expect(err).To(gomega.HaveOccurred()) gomega.Expect(err.Error()).To(gomega.ContainSubstring("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must not be provided")) }) @@ -2041,10 +2106,12 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Fails if management port is not provided and ovnkube node mode is dpu-host", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPUHost, + Mode: types.NodeModeDPUHost, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } - err := buildOvnKubeNodeConfig(&cliConfig, &config{}) + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) gomega.Expect(err).To(gomega.HaveOccurred()) gomega.Expect(err.Error()).To(gomega.ContainSubstring("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must be provided")) }) @@ -2052,13 +2119,17 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Succeeds if management netdev provided in the full mode", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, - MgmtPortNetdev: "ens1f0v0", + Mode: types.NodeModeFull, + MgmtPortNetdev: "ens1f0v0", + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } file := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, + Mode: types.NodeModeFull, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } err := buildOvnKubeNodeConfig(&cliConfig, &file) @@ -2068,13 +2139,17 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Succeeds if management port device plugin resource name provided in the full mode", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, - MgmtPortDPResourceName: "openshift.io/mgmtvf", + Mode: types.NodeModeFull, + MgmtPortDPResourceName: "openshift.io/mgmtvf", + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } file := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, + Mode: types.NodeModeFull, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } err := buildOvnKubeNodeConfig(&cliConfig, &file) diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 512aa6fb53..6e3bc01587 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -38,6 +38,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/dpulease" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/linkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport" nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" @@ -126,6 +127,8 @@ type DefaultNodeNetworkController struct { // retry framework for nodes, used for updating routes/nftables rules for node PMTUD guarding retryNodes *retry.RetryFramework + dpuNodeLeaseManager *dpulease.Manager + apbExternalRouteNodeController *apbroute.ExternalGatewayNodeController cniServer *cni.Server @@ -740,6 +743,22 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error { return fmt.Errorf("failed to parse kubernetes node IP address. %v", nodeAddrStr) } + if (config.OvnKubeNode.Mode == types.NodeModeDPUHost || config.OvnKubeNode.Mode == types.NodeModeDPU) && + config.OvnKubeNode.DPUNodeLeaseRenewInterval > 0 { + nc.dpuNodeLeaseManager = dpulease.NewManager( + nc.client, + config.Kubernetes.OVNConfigNamespace, + node, + time.Duration(config.OvnKubeNode.DPUNodeLeaseRenewInterval)*time.Second, + time.Duration(config.OvnKubeNode.DPUNodeLeaseDuration)*time.Second, + ) + if config.OvnKubeNode.Mode == types.NodeModeDPUHost { + if _, err := nc.dpuNodeLeaseManager.EnsureLease(ctx); err != nil { + return err + } + } + } + // Make sure that the node zone matches with the Southbound db zone. // Wait for 300s before giving up var sbZone string @@ -814,7 +833,7 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error { if !ok { return fmt.Errorf("cannot get kubeclient for starting CNI server") } - cniServer, err = cni.NewCNIServer(nc.watchFactory, kclient.KClient, nc.networkManager, nc.ovsClient) + cniServer, err = cni.NewCNIServer(nc.watchFactory, kclient.KClient, nc.networkManager, nc.ovsClient, nc.dpuNodeLeaseManager) if err != nil { return err } @@ -1027,6 +1046,25 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { nc.healthzServer.Start(nc.stopChan, nc.wg) } + if nc.dpuNodeLeaseManager != nil { + if config.OvnKubeNode.Mode == types.NodeModeDPU { + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.dpuNodeLeaseManager.RunUpdater(ctx) + }() + } else if config.OvnKubeNode.Mode == types.NodeModeDPUHost { + if err := nc.dpuNodeLeaseManager.CheckStatus(ctx); err != nil { + klog.Warningf("Initial DPU node lease check failed: %v", err) + } + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.dpuNodeLeaseManager.RunMonitor(ctx) + }() + } + } + if config.OvnKubeNode.Mode == types.NodeModeDPU { if _, err := nc.watchPodsDPU(); err != nil { return err diff --git a/go-controller/pkg/node/dpulease/manager.go b/go-controller/pkg/node/dpulease/manager.go new file mode 100644 index 0000000000..9ae1758088 --- /dev/null +++ b/go-controller/pkg/node/dpulease/manager.go @@ -0,0 +1,308 @@ +package dpulease + +import ( + "context" + "fmt" + "sync" + "time" + + coordinationv1 "k8s.io/api/coordination/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" +) + +const ( + // HolderIdentity is used on the DPU node lease object + HolderIdentity = "ovnkube-dpu-node" + leaseNamePrefix = "ovn-dpu-" +) + +// Manager handles lifecycle and readiness tracking for the DPU node lease. +type Manager struct { + client kubernetes.Interface + namespace string + nodeName string + nodeUID types.UID + renewInterval time.Duration + leaseDuration time.Duration + + statusMu sync.RWMutex + ready bool + reason string +} + +// NewManager builds a new Manager. +func NewManager(client kubernetes.Interface, namespace string, node *corev1.Node, renewInterval, leaseDuration time.Duration) *Manager { + m := &Manager{ + client: client, + namespace: namespace, + nodeName: node.Name, + nodeUID: node.UID, + renewInterval: renewInterval, + leaseDuration: leaseDuration, + } + + m.setStatus("", true) + + return m +} + +// Ready reports the current readiness and message for consumers such as the CNI server. +func (m *Manager) Ready() (bool, string) { + m.statusMu.RLock() + defer m.statusMu.RUnlock() + return m.ready, m.reason +} + +// EnsureLease creates or updates the DPU lease. +func (m *Manager) EnsureLease(ctx context.Context) (*coordinationv1.Lease, error) { + if m.renewInterval == 0 { + return nil, nil + } + + var lease *coordinationv1.Lease + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + existing, err := m.client.CoordinationV1().Leases(m.namespace).Get(ctx, m.leaseName(), metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + now := metav1.NowMicro() + lease = m.newLease(now) + created, createErr := m.client.CoordinationV1().Leases(m.namespace).Create(ctx, lease, metav1.CreateOptions{}) + if createErr != nil { + if apierrors.IsAlreadyExists(createErr) { + // Treat concurrent lease creation as a retriable conflict so we retry into the Get/Update path. + return apierrors.NewConflict(schema.GroupResource{Group: coordinationv1.GroupName, Resource: "leases"}, m.leaseName(), createErr) + } + return createErr + } + lease = created + return nil + } + if err != nil { + return err + } + lease = existing.DeepCopy() + if !m.updateLeaseSpec(lease, metav1.NowMicro(), true) { + return nil + } + updated, updateErr := m.client.CoordinationV1().Leases(m.namespace).Update(ctx, lease, metav1.UpdateOptions{}) + if updateErr != nil { + return updateErr + } + lease = updated + return nil + }) + if err != nil { + m.setStatus(fmt.Sprintf("failed ensuring DPU lease: %v", err), false) + return nil, err + } + + m.setStatus("", true) + return lease, nil +} + +// RunUpdater periodically renews the lease heartbeat. Intended for DPU nodes. +func (m *Manager) RunUpdater(ctx context.Context) { + if m.renewInterval == 0 { + return + } + + wait.UntilWithContext(ctx, func(ctx context.Context) { + if err := m.Renew(ctx); err != nil { + klog.Warningf("Failed to renew DPU lease %s: %v", m.leaseName(), err) + } + }, m.renewInterval) +} + +// RunMonitor periodically checks the lease for expiry. Intended for DPU host nodes. +func (m *Manager) RunMonitor(ctx context.Context) { + if m.renewInterval == 0 { + return + } + + period := m.monitorPeriod() + wait.UntilWithContext(ctx, func(ctx context.Context) { + if err := m.CheckStatus(ctx); err != nil { + klog.Warningf("DPU lease %s marked unhealthy: %v", m.leaseName(), err) + } + }, period) +} + +// CheckStatus validates the lease and updates readiness. +func (m *Manager) CheckStatus(ctx context.Context) error { + if m.renewInterval == 0 { + m.setStatus("", true) + return nil + } + + lease, err := m.client.CoordinationV1().Leases(m.namespace).Get(ctx, m.leaseName(), metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + m.setStatus("DPU node lease not found", false) + } else { + m.setStatus(fmt.Sprintf("failed to read DPU node lease: %v", err), false) + } + return err + } + + expired, msg := m.isExpired(lease) + if expired { + m.setStatus(msg, false) + return fmt.Errorf("%s", msg) + } + + m.setStatus("", true) + return nil +} + +// Renew bumps the lease renew time, creating the lease if needed. +func (m *Manager) Renew(ctx context.Context) error { + if m.renewInterval == 0 { + return nil + } + + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + lease, err := m.client.CoordinationV1().Leases(m.namespace).Get(ctx, m.leaseName(), metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + _, err = m.EnsureLease(ctx) + return err + } + if err != nil { + return err + } + if !m.updateLeaseSpec(lease, metav1.NowMicro(), true) { + return nil + } + _, err = m.client.CoordinationV1().Leases(m.namespace).Update(ctx, lease, metav1.UpdateOptions{}) + return err + }) +} + +func (m *Manager) monitorPeriod() time.Duration { + period := m.renewInterval + durationFraction := m.leaseDuration / 4 + if durationFraction > 0 && durationFraction < period { + period = durationFraction + } + if period <= 0 { + return time.Second + } + return period +} + +func (m *Manager) setStatus(reason string, ready bool) { + m.statusMu.Lock() + defer m.statusMu.Unlock() + + if m.ready != ready || m.reason != reason { + m.ready = ready + m.reason = reason + } +} + +func (m *Manager) leaseName() string { + return leaseNamePrefix + m.nodeName +} + +func (m *Manager) newLease(now metav1.MicroTime) *coordinationv1.Lease { + return &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: m.leaseName(), + Namespace: m.namespace, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "v1", + Kind: "Node", + Name: m.nodeName, + UID: m.nodeUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }, + }, + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: stringPtr(HolderIdentity), + LeaseDurationSeconds: int32Ptr(int32(m.leaseDuration.Seconds())), + AcquireTime: &now, + RenewTime: &now, + }, + } +} + +func (m *Manager) updateLeaseSpec(lease *coordinationv1.Lease, now metav1.MicroTime, bumpRenew bool) bool { + changed := false + + if lease.Spec.HolderIdentity == nil || *lease.Spec.HolderIdentity != HolderIdentity { + lease.Spec.HolderIdentity = stringPtr(HolderIdentity) + changed = true + } + + if lease.Spec.LeaseDurationSeconds == nil || int32(m.leaseDuration.Seconds()) != *lease.Spec.LeaseDurationSeconds { + lease.Spec.LeaseDurationSeconds = int32Ptr(int32(m.leaseDuration.Seconds())) + changed = true + } + + if bumpRenew { + if lease.Spec.RenewTime == nil || !lease.Spec.RenewTime.Equal(&now) { + lease.Spec.RenewTime = &now + changed = true + } + if lease.Spec.AcquireTime == nil { + lease.Spec.AcquireTime = &now + changed = true + } + } + + if !m.hasOwnerRef(lease.OwnerReferences) { + lease.OwnerReferences = append(lease.OwnerReferences, metav1.OwnerReference{ + APIVersion: "v1", + Kind: "Node", + Name: m.nodeName, + UID: m.nodeUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }) + changed = true + } + + return changed +} + +func (m *Manager) hasOwnerRef(refs []metav1.OwnerReference) bool { + for _, ref := range refs { + if ref.Kind == "Node" && ref.Name == m.nodeName && ref.UID == m.nodeUID { + return true + } + } + return false +} + +func (m *Manager) isExpired(lease *coordinationv1.Lease) (bool, string) { + if lease.Spec.LeaseDurationSeconds == nil || lease.Spec.RenewTime == nil { + return true, "DPU node lease missing renew time or duration" + } + + expire := lease.Spec.RenewTime.Time.Add(time.Duration(*lease.Spec.LeaseDurationSeconds) * time.Second) + if time.Now().After(expire) { + return true, fmt.Sprintf("DPU node lease expired at %s", expire.UTC().Format(time.RFC3339)) + } + return false, "" +} + +func stringPtr(val string) *string { + return &val +} + +func int32Ptr(val int32) *int32 { + return &val +} + +func boolPtr(val bool) *bool { + return &val +} diff --git a/go-controller/pkg/node/dpulease/manager_test.go b/go-controller/pkg/node/dpulease/manager_test.go new file mode 100644 index 0000000000..c669c3067c --- /dev/null +++ b/go-controller/pkg/node/dpulease/manager_test.go @@ -0,0 +1,171 @@ +package dpulease + +import ( + "context" + "testing" + "time" + + "github.com/onsi/gomega" + + coordinationv1 "k8s.io/api/coordination/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" +) + +func TestEnsureLeaseCreatesObject(t *testing.T) { + g := gomega.NewWithT(t) + client := fake.NewSimpleClientset() + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, 10*time.Second, 40*time.Second) + + lease, err := mgr.EnsureLease(context.Background()) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(lease).NotTo(gomega.BeNil()) + + fetched, err := client.CoordinationV1().Leases("ovn-kubernetes").Get(context.Background(), lease.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(fetched.Spec.HolderIdentity).NotTo(gomega.BeNil()) + g.Expect(*fetched.Spec.HolderIdentity).To(gomega.Equal(HolderIdentity)) + g.Expect(fetched.Spec.LeaseDurationSeconds).NotTo(gomega.BeNil()) + g.Expect(*fetched.Spec.LeaseDurationSeconds).To(gomega.Equal(int32(40))) + g.Expect(fetched.Spec.RenewTime).NotTo(gomega.BeNil()) + g.Expect(fetched.OwnerReferences).NotTo(gomega.BeEmpty()) + g.Expect(fetched.OwnerReferences[0].UID).To(gomega.Equal(node.UID)) + + ready, reason := mgr.Ready() + g.Expect(ready).To(gomega.BeTrue()) + g.Expect(reason).To(gomega.BeEmpty()) +} + +func TestRenewUpdatesTimestamp(t *testing.T) { + g := gomega.NewWithT(t) + client := fake.NewSimpleClientset() + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, time.Second, 20*time.Second) + + lease, err := mgr.EnsureLease(context.Background()) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(lease.Spec.RenewTime).NotTo(gomega.BeNil()) + originalRenew := lease.Spec.RenewTime.DeepCopy() + + time.Sleep(10 * time.Millisecond) + g.Expect(mgr.Renew(context.Background())).To(gomega.Succeed()) + + updated, err := client.CoordinationV1().Leases("ovn-kubernetes").Get(context.Background(), lease.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(updated.Spec.RenewTime.Time.After(originalRenew.Time)).To(gomega.BeTrue()) +} + +func TestCheckStatusDetectsExpiry(t *testing.T) { + g := gomega.NewWithT(t) + oldTime := metav1.NewMicroTime(time.Now().Add(-2 * time.Minute)) + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovn-dpu-worker", + Namespace: "ovn-kubernetes", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptrToString(HolderIdentity), + LeaseDurationSeconds: ptrToInt32(10), + RenewTime: &oldTime, + }, + } + client := fake.NewSimpleClientset(lease) + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, time.Second, 10*time.Second) + + err := mgr.CheckStatus(context.Background()) + g.Expect(err).To(gomega.HaveOccurred()) + ready, reason := mgr.Ready() + g.Expect(ready).To(gomega.BeFalse()) + g.Expect(reason).To(gomega.ContainSubstring("expired")) +} + +func TestCheckStatusHealthy(t *testing.T) { + g := gomega.NewWithT(t) + now := metav1.NowMicro() + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovn-dpu-worker", + Namespace: "ovn-kubernetes", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptrToString(HolderIdentity), + LeaseDurationSeconds: ptrToInt32(30), + RenewTime: &now, + }, + } + client := fake.NewSimpleClientset(lease) + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, time.Second, 30*time.Second) + + g.Expect(mgr.CheckStatus(context.Background())).To(gomega.Succeed()) + ready, reason := mgr.Ready() + g.Expect(ready).To(gomega.BeTrue()) + g.Expect(reason).To(gomega.BeEmpty()) +} + +func TestEnsureLeaseRetriesOnAlreadyExists(t *testing.T) { + g := gomega.NewWithT(t) + client := fake.NewSimpleClientset() + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, time.Second, 20*time.Second) + + getCalls := 0 + client.Fake.PrependReactor("get", "leases", func(_ k8stesting.Action) (bool, runtime.Object, error) { + getCalls++ + if getCalls == 1 { + return true, nil, apierrors.NewNotFound(schema.GroupResource{Group: coordinationv1.GroupName, Resource: "leases"}, "ovn-dpu-worker") + } + return false, nil, nil + }) + + createCalls := 0 + client.Fake.PrependReactor("create", "leases", func(_ k8stesting.Action) (bool, runtime.Object, error) { + createCalls++ + if createCalls == 1 { + now := metav1.NowMicro() + existing := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovn-dpu-worker", + Namespace: "ovn-kubernetes", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptrToString("someone-else"), + LeaseDurationSeconds: ptrToInt32(1), + RenewTime: &now, + }, + } + g.Expect(client.Tracker().Add(existing)).To(gomega.Succeed()) + return true, nil, apierrors.NewAlreadyExists(schema.GroupResource{Group: coordinationv1.GroupName, Resource: "leases"}, "ovn-dpu-worker") + } + return false, nil, nil + }) + + lease, err := mgr.EnsureLease(context.Background()) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(lease).NotTo(gomega.BeNil()) + g.Expect(createCalls).To(gomega.Equal(1)) + g.Expect(getCalls).To(gomega.BeNumerically(">=", 2)) + + fetched, err := client.CoordinationV1().Leases("ovn-kubernetes").Get(context.Background(), "ovn-dpu-worker", metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(fetched.Spec.HolderIdentity).NotTo(gomega.BeNil()) + g.Expect(*fetched.Spec.HolderIdentity).To(gomega.Equal(HolderIdentity)) + g.Expect(fetched.Spec.LeaseDurationSeconds).NotTo(gomega.BeNil()) + g.Expect(*fetched.Spec.LeaseDurationSeconds).To(gomega.Equal(int32(20))) +} + +func ptrToString(val string) *string { + return &val +} + +func ptrToInt32(val int32) *int32 { + return &val +} diff --git a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml index a2bec63d7e..efadaa7219 100644 --- a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml +++ b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml @@ -235,3 +235,39 @@ rules: {{- if eq (hasKey .Values.global "enableInterconnect" | ternary .Values.global.enableInterconnect false) true }} - create {{- end }} + +{{- $tags := (.Values.tags | default dict) }} +{{- if (index $tags "ovnkube-node-dpu-host") }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: ovnkube-node-dpu-leases + namespace: ovn-kubernetes +roleRef: + name: ovnkube-node-dpu-leases + kind: Role + apiGroup: rbac.authorization.k8s.io +subjects: + {{- if eq (hasKey .Values.global "enableOvnKubeIdentity" | ternary .Values.global.enableOvnKubeIdentity true) true }} + - kind: Group + name: system:ovn-nodes + apiGroup: rbac.authorization.k8s.io + {{- else }} + - kind: ServiceAccount + name: ovnkube-node + namespace: ovn-kubernetes + {{- end }} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: ovnkube-node-dpu-leases + namespace: ovn-kubernetes +rules: + - apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: [ "get", "create", "update" ] +{{- end }} From 845456d0d0612d0a88cadae52e47e62db6b3cf1c Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 13 Feb 2026 18:05:43 -0500 Subject: [PATCH 57/59] Moves OVN-K to CNI 1.1.0 GC CMD is just a noop handler, like CHECK. Signed-off-by: Tim Rozet --- .../ovn-k8s-cni-overlay.go | 1 + .../pkg/clustermanager/clustermanager_test.go | 2 +- .../controller_components_test.go | 4 +-- .../networkconnect/controller_test.go | 6 ++-- .../routeadvertisements/controller_test.go | 2 +- .../userdefinednetwork/controller_test.go | 8 +++--- .../userdefinednetwork/nad_test.go | 12 ++++---- .../template/net-attach-def-template.go | 6 ++-- .../template/net-attach-def-template_test.go | 28 +++++++++---------- go-controller/pkg/cni/cniserver_test.go | 2 +- go-controller/pkg/cni/cnishim.go | 22 +++++++++++++++ go-controller/pkg/cni/cnishim_test.go | 22 +++++++++++---- go-controller/pkg/cni/helper_linux_test.go | 4 +-- go-controller/pkg/cni/types.go | 3 ++ go-controller/pkg/config/cni.go | 5 +++- .../controller_components_test.go | 2 +- go-controller/pkg/testing/util.go | 4 +-- go-controller/pkg/util/multi_network_test.go | 4 +-- go-controller/pkg/util/nad.go | 2 +- test/e2e/network_segmentation.go | 5 ++-- 20 files changed, 91 insertions(+), 53 deletions(-) diff --git a/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go b/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go index 8a31455f29..c3cb365244 100644 --- a/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go +++ b/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go @@ -25,6 +25,7 @@ func main() { Add: p.CmdAdd, Check: p.CmdCheck, Del: p.CmdDel, + GC: p.CmdGC, Status: p.CmdStatus, }, version.All, diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index 58e234e67f..eba62f731e 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -1649,7 +1649,7 @@ var _ = ginkgo.Describe("Cluster Manager", func() { ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - const expectedNADContents = `{"cniVersion": "0.4.0", "name": "ovn-kubernetes", "type": "ovn-k8s-cni-overlay"}` + const expectedNADContents = `{"cniVersion": "1.1.0", "name": "ovn-kubernetes", "type": "ovn-k8s-cni-overlay"}` gomega.Expect(nad.Spec.Config).To(gomega.Equal(expectedNADContents)) return nil diff --git a/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go b/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go index 461eda3c40..23cf247401 100644 --- a/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go +++ b/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go @@ -1259,11 +1259,11 @@ func TestNADNeedsUpdate(t *testing.T) { udnOwner := makeUDNOwnerRef("test-udn") makePrimaryNADConfig := func(name string) string { - return fmt.Sprintf(`{"cniVersion": "0.4.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "role": "primary", "netAttachDefName": "test/%s"}`, name, name) + return fmt.Sprintf(`{"cniVersion": "1.1.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "role": "primary", "netAttachDefName": "test/%s"}`, name, name) } makeSecondaryNADConfig := func(name string) string { - return fmt.Sprintf(`{"cniVersion": "0.4.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "netAttachDefName": "test/%s"}`, name, name) + return fmt.Sprintf(`{"cniVersion": "1.1.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "netAttachDefName": "test/%s"}`, name, name) } tests := []struct { diff --git a/go-controller/pkg/clustermanager/networkconnect/controller_test.go b/go-controller/pkg/clustermanager/networkconnect/controller_test.go index a712abfe34..8598988e5f 100644 --- a/go-controller/pkg/clustermanager/networkconnect/controller_test.go +++ b/go-controller/pkg/clustermanager/networkconnect/controller_test.go @@ -73,7 +73,7 @@ func newTestUDNNAD(name, namespace, network string, networkID string) *nadv1.Net }, Spec: nadv1.NetworkAttachmentDefinitionSpec{ Config: fmt.Sprintf( - `{"cniVersion": "0.4.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "%s/%s", "role": "primary", "subnets": "10.0.0.0/16/24"}`, + `{"cniVersion": "1.1.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "%s/%s", "role": "primary", "subnets": "10.0.0.0/16/24"}`, network, config.CNI.Plugin, namespace, @@ -601,7 +601,7 @@ var _ = ginkgo.Describe("NetworkConnect ClusterManager Controller Integration Te }, Spec: nadv1.NetworkAttachmentDefinitionSpec{ Config: fmt.Sprintf( - `{"cniVersion": "0.4.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "secondary-ns/cudn-secondary", "subnets": "10.0.0.0/16/24"}`, + `{"cniVersion": "1.1.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "secondary-ns/cudn-secondary", "subnets": "10.0.0.0/16/24"}`, network, config.CNI.Plugin, ), @@ -1204,7 +1204,7 @@ var _ = ginkgo.Describe("NetworkConnect ClusterManager Controller Integration Te }, Spec: nadv1.NetworkAttachmentDefinitionSpec{ // Invalid JSON config - missing required fields, will fail ParseNADInfo - Config: `{"cniVersion": "0.4.0", "name": "malformed", "type": "invalid-type"}`, + Config: `{"cniVersion": "1.1.0", "name": "malformed", "type": "invalid-type"}`, }, } _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("malformed-ns").Create( diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go index 1bad4f1ad5..44a29a4545 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go @@ -332,7 +332,7 @@ func (tn testNAD) NAD() *nadtypes.NetworkAttachmentDefinition { // Build the config as a map to properly marshal EVPN config cniConfig := map[string]interface{}{ - "cniVersion": "0.4.0", + "cniVersion": "1.1.0", "name": tn.Network, "type": config.CNI.Plugin, "netAttachDefName": tn.Namespace + "/" + tn.Name, diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go index 166931625d..7550ab1c0a 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go @@ -458,7 +458,7 @@ var _ = Describe("User Defined Network Controller", func() { nad := testClusterUdnNAD(cudn.Name, nsName) networkName := ovntypes.CUDNPrefix + cudn.Name nadName := nsName + "/" + cudn.Name - nad.Spec.Config = `{"cniVersion":"1.0.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` + nad.Spec.Config = `{"cniVersion":"1.1.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` expectedNsNADs[nsName] = nad } @@ -1319,7 +1319,7 @@ var _ = Describe("User Defined Network Controller", func() { for _, nsName := range testNamespaces { nad := testClusterUdnNAD(cudn.Name, nsName) nadName := nsName + "/" + cudn.Name - nad.Spec.Config = `{"cniVersion":"1.0.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` + nad.Spec.Config = `{"cniVersion":"1.1.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` nad.Annotations = map[string]string{ "foo": "bar", ovntypes.OvnNetworkNameAnnotation: networkName, @@ -2505,10 +2505,10 @@ func testEVPNClusterUdnNADWithVIDs(name, namespace, vtepName string, macVID, ipV nad := testClusterUdnNAD(name, namespace) if ipVID > 0 { // Symmetric IRB (both MAC-VRF and IP-VRF) - nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.0.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d},"ipVRF":{"vni":200,"vid":%d}}}`, name, namespace, name, vtepName, macVID, ipVID) + nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.1.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d},"ipVRF":{"vni":200,"vid":%d}}}`, name, namespace, name, vtepName, macVID, ipVID) } else { // MAC-VRF only - nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.0.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d}}}`, name, namespace, name, vtepName, macVID) + nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.1.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d}}}`, name, namespace, name, vtepName, macVID) } return nad } diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/nad_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/nad_test.go index 77a03b529d..d841956c91 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/nad_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/nad_test.go @@ -91,15 +91,15 @@ var _ = Describe("PrimaryNetAttachDefNotExist", func() { nads := []*netv1.NetworkAttachmentDefinition{ { ObjectMeta: metav1.ObjectMeta{Name: "test-net1", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, }, { ObjectMeta: metav1.ObjectMeta{Name: "test-net2", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, }, { ObjectMeta: metav1.ObjectMeta{Name: "test-net3", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "fake-ovn-cni","role": "primary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "fake-ovn-cni","role": "primary"}`}, }, } Expect(PrimaryNetAttachDefNotExist(nads)).To(Succeed()) @@ -108,15 +108,15 @@ var _ = Describe("PrimaryNetAttachDefNotExist", func() { nads := []*netv1.NetworkAttachmentDefinition{ { ObjectMeta: metav1.ObjectMeta{Name: "test-net1", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "ovn-k8s-cni-overlay","role": "primary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "ovn-k8s-cni-overlay","role": "primary"}`}, }, { ObjectMeta: metav1.ObjectMeta{Name: "test-net2", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, }, { ObjectMeta: metav1.ObjectMeta{Name: "test-net3", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "fake-ovn-cni","role": "primary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "fake-ovn-cni","role": "primary"}`}, }, } Expect(PrimaryNetAttachDefNotExist(nads)).ToNot(Succeed()) diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go index 62850d4a23..72ee497fa6 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go @@ -24,8 +24,6 @@ const ( FinalizerUserDefinedNetwork = "k8s.ovn.org/user-defined-network-protection" LabelUserDefinedNetwork = "k8s.ovn.org/user-defined-network" - - cniVersion = "1.0.0" ) type SpecGetter interface { @@ -139,7 +137,7 @@ func validateTopology(spec SpecGetter) error { func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter, opts *RenderOptions) (map[string]interface{}, error) { netConfSpec := &ovncnitypes.NetConf{ NetConf: cnitypes.NetConf{ - CNIVersion: cniVersion, + CNIVersion: config.CNISpecVersion, Type: OvnK8sCNIOverlay, Name: networkName, }, @@ -224,7 +222,7 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter, opts * // Generating the net-conf JSON string using 'map[string]struct{}' provide the // expected result. cniNetConf := map[string]interface{}{ - "cniVersion": cniVersion, + "cniVersion": config.CNISpecVersion, "type": OvnK8sCNIOverlay, "name": networkName, "netAttachDefName": nadName, diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go index 5881617c6b..0967125e02 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go @@ -352,7 +352,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "mynamespace_test-net", "netAttachDefName": "mynamespace/test-net", @@ -376,7 +376,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "mynamespace_test-net", "netAttachDefName": "mynamespace/test-net", @@ -403,7 +403,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "mynamespace_test-net", "netAttachDefName": "mynamespace/test-net", @@ -429,7 +429,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "mynamespace_test-net", "netAttachDefName": "mynamespace/test-net", @@ -490,7 +490,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -514,7 +514,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -541,7 +541,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -567,7 +567,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -594,7 +594,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -623,7 +623,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -655,7 +655,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -695,7 +695,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -736,7 +736,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -778,7 +778,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", diff --git a/go-controller/pkg/cni/cniserver_test.go b/go-controller/pkg/cni/cniserver_test.go index 484f6cfe52..cb0507a323 100644 --- a/go-controller/pkg/cni/cniserver_test.go +++ b/go-controller/pkg/cni/cniserver_test.go @@ -167,7 +167,7 @@ func TestCNIServer(t *testing.T) { Config: []byte(cniConfig), DeviceInfo: nadapi.DeviceInfo{ Type: "vdpa", - Version: "1.0.0", + Version: "1.1.0", Vdpa: &nadapi.VdpaDevice{ ParentDevice: "vdpa:0000:65:00.3", Driver: "vhost", diff --git a/go-controller/pkg/cni/cnishim.go b/go-controller/pkg/cni/cnishim.go index 7bfbfdbe0c..4828eb2ee0 100644 --- a/go-controller/pkg/cni/cnishim.go +++ b/go-controller/pkg/cni/cnishim.go @@ -366,6 +366,28 @@ func (p *Plugin) CmdStatus(args *skel.CmdArgs) error { return err } +// CmdGC is the callback for runtime garbage collection. +func (p *Plugin) CmdGC(args *skel.CmdArgs) error { + var err error + + startTime := time.Now() + defer func() { + p.postMetrics(startTime, CNIGC, err) + if err != nil { + klog.Errorf("Error on CmdGC: %v", err) + } + }() + + conf, err := config.ReadCNIConfig(args.StdinData) + if err != nil { + return err + } + setupLogging(conf) + + // OVN-Kubernetes does not maintain independent local plugin state that needs GC. + return nil +} + // CmdCheck is the callback for 'checking' container's networking is as expected. func (p *Plugin) CmdCheck(_ *skel.CmdArgs) error { // noop...CMD check is not considered useful, and has a considerable performance impact diff --git a/go-controller/pkg/cni/cnishim_test.go b/go-controller/pkg/cni/cnishim_test.go index 4793feeec5..633b7093fa 100644 --- a/go-controller/pkg/cni/cnishim_test.go +++ b/go-controller/pkg/cni/cnishim_test.go @@ -49,7 +49,7 @@ func TestCmdAdd_PrivilegedMode(t *testing.T) { }() args := &skel.CmdArgs{ - StdinData: []byte(`{"cniVersion":"1.0.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), ContainerID: "cid", Netns: "/var/run/netns/test", IfName: "eth0", @@ -66,7 +66,7 @@ func TestCmdAdd_PrivilegedMode(t *testing.T) { } expected := `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "interfaces": [ { "name": "serverWired" @@ -156,7 +156,7 @@ func TestCmdAdd_UnprivilegedMode(t *testing.T) { }() args := &skel.CmdArgs{ - StdinData: []byte(`{"cniVersion":"1.0.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), ContainerID: "cid", Netns: "/var/run/netns/test", IfName: "eth0", @@ -178,7 +178,7 @@ func TestCmdAdd_UnprivilegedMode(t *testing.T) { // Expected output includes both interfaces wired by CNIShim expected := `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "interfaces": [ { "name": "eth0", @@ -232,7 +232,7 @@ func TestCmdDel_PrivilegedMode(t *testing.T) { } args := &skel.CmdArgs{ - StdinData: []byte(`{"cniVersion":"1.0.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), ContainerID: "cid", Netns: "/var/run/netns/test", IfName: "eth0", @@ -277,7 +277,7 @@ func TestCmdDel_UnprivilegedMode(t *testing.T) { } args := &skel.CmdArgs{ - StdinData: []byte(`{"cniVersion":"1.0.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), ContainerID: "cid", Netns: "/var/run/netns/test", IfName: "eth0", @@ -291,6 +291,16 @@ func TestCmdDel_UnprivilegedMode(t *testing.T) { }) } +func TestCmdGC(t *testing.T) { + p := &Plugin{} + args := &skel.CmdArgs{ + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + } + + err := p.CmdGC(args) + require.NoError(t, err) +} + func withCNIEnv(t *testing.T, fn func()) { t.Helper() diff --git a/go-controller/pkg/cni/helper_linux_test.go b/go-controller/pkg/cni/helper_linux_test.go index 0b99996770..8e1221ed93 100644 --- a/go-controller/pkg/cni/helper_linux_test.go +++ b/go-controller/pkg/cni/helper_linux_test.go @@ -1240,7 +1240,7 @@ func TestPodRequest_deletePodConntrack(t *testing.T) { }, }, inpPrevResult: ¤t.Result{ - CNIVersion: "1.0.0", + CNIVersion: "1.1.0", Interfaces: []*current.Interface{{Name: "eth0"}}, IPs: []*current.IPConfig{{Interface: &[]int{0}[0], Address: *ovntest.MustParseIPNet("192.168.1.15/24"), Gateway: ovntest.MustParseIP("192.168.1.1")}}, }, @@ -1255,7 +1255,7 @@ func TestPodRequest_deletePodConntrack(t *testing.T) { }, }, inpPrevResult: ¤t.Result{ - CNIVersion: "1.0.0", + CNIVersion: "1.1.0", Interfaces: []*current.Interface{{Name: "eth0", Sandbox: "blah"}}, IPs: []*current.IPConfig{{Interface: &[]int{0}[0], Address: *ovntest.MustParseIPNet("192.168.1.15/24"), Gateway: ovntest.MustParseIP("192.168.1.1")}}, }, diff --git a/go-controller/pkg/cni/types.go b/go-controller/pkg/cni/types.go index aaeec4ee5d..22866aa193 100644 --- a/go-controller/pkg/cni/types.go +++ b/go-controller/pkg/cni/types.go @@ -84,6 +84,9 @@ const CNICheck command = "CHECK" // CNIStatus is the command representing a plugin readiness check const CNIStatus command = "STATUS" +// CNIGC is the command representing CNI runtime garbage collection +const CNIGC command = "GC" + // Request sent to the Server by the OVN CNI plugin type Request struct { // CNI environment variables, like CNI_COMMAND and CNI_NETNS diff --git a/go-controller/pkg/config/cni.go b/go-controller/pkg/config/cni.go index 3bec2d286f..1038b4ac8d 100644 --- a/go-controller/pkg/config/cni.go +++ b/go-controller/pkg/config/cni.go @@ -19,13 +19,16 @@ import ( var ErrorAttachDefNotOvnManaged = errors.New("net-attach-def not managed by OVN") var ErrorChainingNotSupported = errors.New("CNI plugin chaining is not supported") +// CNISpecVersion is the CNI spec version used when OVN-Kubernetes renders CNI config. +const CNISpecVersion = "1.1.0" + // WriteCNIConfig writes a CNI JSON config file to directory given by global config // if the file doesn't already exist, or is different than the content that would // be written. func WriteCNIConfig() error { netConf := &ovncnitypes.NetConf{ NetConf: types.NetConf{ - CNIVersion: "0.4.0", + CNIVersion: CNISpecVersion, Name: "ovn-kubernetes", Type: CNI.Plugin, }, diff --git a/go-controller/pkg/ovn/controller/networkconnect/controller_components_test.go b/go-controller/pkg/ovn/controller/networkconnect/controller_components_test.go index 3a08c31901..070f4c19a3 100644 --- a/go-controller/pkg/ovn/controller/networkconnect/controller_components_test.go +++ b/go-controller/pkg/ovn/controller/networkconnect/controller_components_test.go @@ -519,7 +519,7 @@ func TestController_syncNAD(t *testing.T) { context.Background(), cnc, metav1.CreateOptions{}) g.Expect(err).ToNot(gomega.HaveOccurred()) - nadConfig := `{"cniVersion":"0.4.0","name":"net1","type":"ovn-k8s-cni-overlay","topology":"layer3","role":"primary","netAttachDefName":"ns1/nad1"}` + nadConfig := `{"cniVersion":"1.1.0","name":"net1","type":"ovn-k8s-cni-overlay","topology":"layer3","role":"primary","netAttachDefName":"ns1/nad1"}` nad := &nettypes.NetworkAttachmentDefinition{ ObjectMeta: metav1.ObjectMeta{ Namespace: "ns1", diff --git a/go-controller/pkg/testing/util.go b/go-controller/pkg/testing/util.go index 0a49731b94..7ac3eff77f 100644 --- a/go-controller/pkg/testing/util.go +++ b/go-controller/pkg/testing/util.go @@ -19,7 +19,7 @@ func GenerateNAD(networkName, name, namespace, topology, cidr, role string) *nad return GenerateNADWithConfig(name, namespace, fmt.Sprintf( ` { - "cniVersion": "0.4.0", + "cniVersion": "1.1.0", "name": %q, "type": "ovn-k8s-cni-overlay", "topology":%q, @@ -48,7 +48,7 @@ func GenerateNADWithoutMTU(networkName, name, namespace, topology, cidr, role st return GenerateNADWithConfig(name, namespace, fmt.Sprintf( ` { - "cniVersion": "0.4.0", + "cniVersion": "1.1.0", "name": %q, "type": "ovn-k8s-cni-overlay", "topology":%q, diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index 2861650ba8..14071ec99e 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -317,7 +317,7 @@ func TestParseNetconf(t *testing.T) { inputNetAttachDefConfigSpec: ` { "name": "tenantred", - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "plugins": [ { "type": "ovn-k8s-cni-overlay", @@ -333,7 +333,7 @@ func TestParseNetconf(t *testing.T) { NADName: "ns1/nad1", MTU: 1400, VLANID: 10, - NetConf: cnitypes.NetConf{Name: "tenantred", CNIVersion: "1.0.0", Type: "ovn-k8s-cni-overlay"}, + NetConf: cnitypes.NetConf{Name: "tenantred", CNIVersion: "1.1.0", Type: "ovn-k8s-cni-overlay"}, }, }, { diff --git a/go-controller/pkg/util/nad.go b/go-controller/pkg/util/nad.go index 3a220e2b82..d80b56c7cd 100644 --- a/go-controller/pkg/util/nad.go +++ b/go-controller/pkg/util/nad.go @@ -35,7 +35,7 @@ func EnsureDefaultNetworkNAD(nadLister nadlisters.NetworkAttachmentDefinitionLis Namespace: config.Kubernetes.OVNConfigNamespace, }, Spec: nadtypes.NetworkAttachmentDefinitionSpec{ - Config: fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"ovn-kubernetes\", \"type\": \"%s\"}", config.CNI.Plugin), + Config: fmt.Sprintf("{\"cniVersion\": \"%s\", \"name\": \"ovn-kubernetes\", \"type\": \"%s\"}", config.CNISpecVersion, config.CNI.Plugin), }, }, // note we don't set ourselves as field manager for this create as we diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 0aefc2236c..75bd777990 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -47,6 +47,7 @@ import ( const openDefaultPortsAnnotation = "k8s.ovn.org/open-default-ports" const RequiredUDNNamespaceLabel = "k8s.ovn.org/primary-user-defined-network" const OvnPodAnnotationName = "k8s.ovn.org/pod-networks" +const expectedUDNCNIVersion = "1.1.0" var _ = Describe("Network Segmentation", feature.NetworkSegmentation, func() { f := wrappedTestFramework("network-segmentation") @@ -2093,7 +2094,7 @@ func assertL2SecondaryNetAttachDefManifest(nadClient nadclient.K8sCniCncfIoV1Int expectedNetworkName := namespace + "_" + udnName expectedNadName := namespace + "/" + udnName ExpectWithOffset(1, nad.Spec.Config).To(MatchJSON(`{ - "cniVersion":"1.0.0", + "cniVersion":"` + expectedUDNCNIVersion + `", "type": "ovn-k8s-cni-overlay", "name": "` + expectedNetworkName + `", "netAttachDefName": "` + expectedNadName + `", @@ -2158,7 +2159,7 @@ func assertClusterNADManifest(nadClient nadclient.K8sCniCncfIoV1Interface, names expectedNetworkName := "cluster_udn_" + udnName expectedNadName := namespace + "/" + udnName ExpectWithOffset(1, nad.Spec.Config).To(MatchJSON(`{ - "cniVersion":"1.0.0", + "cniVersion":"` + expectedUDNCNIVersion + `", "type": "ovn-k8s-cni-overlay", "name": "` + expectedNetworkName + `", "netAttachDefName": "` + expectedNadName + `", From d92494bc799604aa756f56f90a191e7ffb777b22 Mon Sep 17 00:00:00 2001 From: origin-release-container Date: Fri, 27 Feb 2026 05:02:56 +0000 Subject: [PATCH 58/59] sync test annotations with upstream changes - go mod vendor - ./openshift/hack/update-tests-annotation.sh Automated sync after downstream merge to keep test annotations in sync with upstream test modifications and rules.go changes. --- .../generated/zz_generated.annotations.go | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/openshift/test/generated/zz_generated.annotations.go b/openshift/test/generated/zz_generated.annotations.go index 3ec39fa006..1cd08beb94 100644 --- a/openshift/test/generated/zz_generated.annotations.go +++ b/openshift/test/generated/zz_generated.annotations.go @@ -1141,12 +1141,16 @@ var AppendedAnnotations = map[string]string{ "Multicast when multicast enabled for namespace should be able to send multicast UDP traffic between nodes": "[Disabled:Unimplemented]", + "Network Policy: ICMP bypass allows ICMP between pods with default deny policy on the default network": "[Disabled:Unimplemented]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller pod connected to ClusterUserDefinedNetwork CR & managed NADs cannot be deleted when being used": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller should create NAD according to spec in each target namespace and report active namespaces": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller should create NAD in new created namespaces that apply to namespace-selector": "[Suite:openshift/conformance/parallel]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller should delete NAD when target namespace is terminating": "[Suite:openshift/conformance/parallel]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller when CR is deleted, should delete all managed NAD in each target namespace": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller when namespace-selector is mutated should create NAD in namespaces that apply to mutated namespace-selector": "[Suite:openshift/conformance/parallel]", @@ -1337,6 +1341,10 @@ var AppendedAnnotations = map[string]string{ "Network Segmentation: Localnet using ClusterUserDefinedNetwork CR, pods in different namespaces, should communicate over localnet topology": "[Disabled:Unimplemented]", + "Network Segmentation: Network Policies on a user defined primary network ICMP should bypass default deny policy for UDNs when enabled in L2 dualstack primary UDN": "[Suite:openshift/conformance/parallel]", + + "Network Segmentation: Network Policies on a user defined primary network ICMP should bypass default deny policy for UDNs when enabled in L3 dualstack primary UDN": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: Network Policies on a user defined primary network allow ingress traffic to one pod from a particular namespace in L2 primary UDN": "[Disabled:Unimplemented]", "Network Segmentation: Network Policies on a user defined primary network allow ingress traffic to one pod from a particular namespace in L3 primary UDN": "[Disabled:Unimplemented]", @@ -1365,6 +1373,8 @@ var AppendedAnnotations = map[string]string{ "Network Segmentation: Preconfigured Layer2 UDN unmasked reserved / infrastructure subnets are not allowed Layer2 with unmasked IPv6 reserved subnets": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: integration should recover ovnkube pods after restart with primary and secondary UDN resources": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: services on a user defined primary network should be reachable through their cluster IP, node port and load balancer L2 primary UDN with custom network, cluster-networked pods, NodePort service": "[Suite:openshift/conformance/parallel]", "Network Segmentation: services on a user defined primary network should be reachable through their cluster IP, node port and load balancer L2 primary UDN, cluster-networked pods, NodePort service": "[Disabled:Unimplemented]", @@ -1421,6 +1431,8 @@ var AppendedAnnotations = map[string]string{ "Services does not use host masquerade address as source IP address when communicating externally": "[Disabled:Unimplemented]", + "Services of type NodePort should be able to preserve UDP traffic when server pod cycles for a NodePort service via a different node": "[Disabled:Unimplemented]", + "Services of type NodePort should handle IP fragments": "[Disabled:Unimplemented]", "Services of type NodePort should listen on each host addresses": "[Disabled:Unimplemented]", @@ -1551,6 +1563,10 @@ var AppendedAnnotations = map[string]string{ "e2e delete databases recovering from deleting db files while maintaining connectivity when deleting both db files on ovnkube-db-2": "[Disabled:Unimplemented]", + "e2e egress IP validation Cluster Default Network Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Cluster Default Network Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Cluster Default Network Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Cluster Default Network Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1591,6 +1607,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Cluster Default Network of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1631,6 +1651,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv4 L2 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1671,6 +1695,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv4 L3 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1711,6 +1739,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv6 L2 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", From eafe465f549bde252410b7f22cb2aaa48780b97e Mon Sep 17 00:00:00 2001 From: Jamo Luhrsen Date: Thu, 26 Feb 2026 21:15:00 -0800 Subject: [PATCH 59/59] add ListNetworks() method to openshift infra provider an upstream commit [0] added the ListNetworks() method to the test infraprovider api interface. the openshift infraprovider downstream only needs to also implement it. For now it's just a stub with panic() like others that still need work. we need it or the test extension binary build fails which is part of the main Dockerfile build. [0] https://github.com/ovn-kubernetes/ovn-kubernetes/pull/5931/changes/2b1f4a849975efd23ebc7a585687156fc6924028 Signed-off-by: Jamo Luhrsen --- openshift/test/infraprovider/openshift.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/openshift/test/infraprovider/openshift.go b/openshift/test/infraprovider/openshift.go index ab4bf119a7..cb3920a580 100644 --- a/openshift/test/infraprovider/openshift.go +++ b/openshift/test/infraprovider/openshift.go @@ -153,6 +153,10 @@ func (c *contextOpenshift) GetExternalContainerLogs(container api.ExternalContai panic("not implemented") } +func (o openshift) ListNetworks() ([]string, error) { + panic("not implemented") +} + func (c contextOpenshift) CreateNetwork(name string, subnets ...string) (api.Network, error) { panic("not implemented") }