diff --git a/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 b/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2
index 88901528d4..36c7f050fd 100644
--- a/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2
+++ b/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2
@@ -313,6 +313,14 @@ spec:
                       rule: '!has(self.infrastructureSubnets) || !has(self.reservedSubnets)
                         || self.infrastructureSubnets.all(infra, !self.reservedSubnets.exists(reserved,
                         cidr(infra).containsCIDR(reserved) || cidr(reserved).containsCIDR(infra)))'
+                    - message: infrastructureSubnets must be a masked network address
+                        (no host bits set)
+                      rule: '!has(self.infrastructureSubnets) || self.infrastructureSubnets.all(s,
+                        isCIDR(s) && cidr(s) == cidr(s).masked())'
+                    - message: reservedSubnets must be a masked network address (no
+                        host bits set)
+                      rule: '!has(self.reservedSubnets) || self.reservedSubnets.all(s,
+                        isCIDR(s) && cidr(s) == cidr(s).masked())'
                   layer3:
                     description: Layer3 is the Layer3 topology configuration.
                     properties:
diff --git a/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 b/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2
index 9b855382a9..073c64bac0 100644
--- a/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2
+++ b/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2
@@ -257,6 +257,14 @@ spec:
                   rule: '!has(self.infrastructureSubnets) || !has(self.reservedSubnets)
                     || self.infrastructureSubnets.all(infra, !self.reservedSubnets.exists(reserved,
                     cidr(infra).containsCIDR(reserved) || cidr(reserved).containsCIDR(infra)))'
+                - message: infrastructureSubnets must be a masked network address
+                    (no host bits set)
+                  rule: '!has(self.infrastructureSubnets) || self.infrastructureSubnets.all(s,
+                    isCIDR(s) && cidr(s) == cidr(s).masked())'
+                - message: reservedSubnets must be a masked network address (no host
+                    bits set)
+                  rule: '!has(self.reservedSubnets) || self.reservedSubnets.all(s,
+                    isCIDR(s) && cidr(s) == cidr(s).masked())'
               layer3:
                 description: Layer3 is the Layer3 topology configuration.
                 properties:
diff --git a/docs/okeps/okep-5494-ovn-kubernetes-mcp-server.md b/docs/okeps/okep-5494-ovn-kubernetes-mcp-server.md
new file mode 100644
index 0000000000..cffe1970d5
--- /dev/null
+++ b/docs/okeps/okep-5494-ovn-kubernetes-mcp-server.md
@@ -0,0 +1,798 @@
+# OKEP-5494: Model Context Protocol for Troubleshooting OVN-Kubernetes
+
+# Problem Statement
+
+Diagnosing an issue in [OVN-Kubernetes](https://ovn-kubernetes.io/) network
+plugin is complex because it has many layers (Kubernetes, OVN-Kubernetes, OVN,
+OpenvSwitch, Kernel - especially Netfilter elements). Usually the person
+troubleshooting an issue has to approach it in a layered fashion and has
+to be fully aware of all the debugging tools each layer has to offer to
+then be able to pin point where the problem is. That is time consuming
+and requires years of expertise on each depth of the stack and across all
+features. Sometimes it also involves working with the layered community
+project teams like OVN and OVS since they hold more knowledge about their
+domain than engineers working on the OVN-Kubernetes plugin. Each
+troubleshooting session to understand where the packet is getting
+blackholed or dropped takes a lot of time to solve (unless it's trivial).
+
+# Goals
+
+The goal of this enhancement is to try to improve "troubleshooting time",
+"tool usage/typing time" and "erase the need to know how to use each of those
+tools to a parameter level detail" by exposing these tools using
+[Model Context Protocol](https://modelcontextprotocol.io/docs/learn/architecture#overview) (MCP)
+and leveraging the backend Model's (Claude Sonnet4, Gemini2.5Pro, GPT-5, etc)
+knowledge and context to troubleshoot issues on live clusters or locally on
+containers with end user's databases loaded. This can go a long
+way in speeding up bug triages.
+
+* Phase1 targets only ovn-kubernetes community members as target audience
+* Build an OVN-Kubernetes MCP Server(s) that exposes all the tools required
+  to troubleshoot OVN-Kubernetes network plugin
+    * This MCP Server(s) must also be aware of where to run these tools to
+      troubleshoot the issue in question on a cluster (i.e Which node? Which
+      pod? Which container?)
+* Add support to load a MCP Server against not just a real-time cluster
+  but also against a simulated environment constructed from information bundle
+  gathered or other debugging information extracted from a live cluster.
+  Examples are [must-gather](https://github.com/openshift/must-gather) and
+  [sos-reports](https://github.com/sosreport/sos) and how tools like
+  [omc](https://github.com/gmeghnag/omc) or [xsos](https://github.com/ryran/xsos)
+  can be leveraged
+* Ensure that the tools we expose have read only permissions. Whether this means
+  we restrict the tools itself with read rights only OR expose only read actions
+  via the tools is to be discussed in proposal section.
+* Support troubleshooting all features for OVN-Kubernetes
+    * We must have extensive failure scenario and chaos tests injected to see
+      how good LLM is able to troubleshoot
+    * We must form a list of all common ovn-kubernetes bugs we have hit across
+      features (example all possible ways of running into staleSNATs)
+    * We must add benchmarking tests for evaluating the quality of troubleshooting
+      specific to CNI Networking.
+
+# Future Goals
+
+* Phase2 targets end-users and others using OVN-Kubernetes to troubleshoot
+  issues
+* Expand the MCP server to not just provide tools but also relevant context
+  around OVN-Kubernetes implementation
+* Creating a RAGing system with all internal docs and integrating that with
+  the prompt client for better results (would require improving our docs
+  first). This is especially important for the LLM to know how OVN-Kubernetes
+  implements each traffic flow and feature and what constructs are created
+  underneath into the layers. This might not be needed for older features
+  like EgressIPs that has plenty of online resources but for newer features
+  like RouteAdvertisements, the LLM may not know much without providing that
+  extra context.
+* Investigate if there is potential for converting it also into a remediation
+  system (this would need write access)
+* Work with the Layered project community teams (OVN, OpenvSwitch, Kernel)
+  for each of those layers to also own an MCP server since they know their
+  stack better. So instead of 1 OVN-Kubernetes MCP Server it would be a
+  bunch of servers each owned by specific upstream community projects for
+  better maintenance and ownership
+* The same MCP Server could also potentially be used as a
+  "OVN-Kubernetes network plugin - know it better" chatbot but that is not
+  the initial goal here.
+
+# Future-Stretch-Goals
+
+* Phase3 includes this getting productized and shipped to end-users using
+  OVN-Kubernetes in some far fetched future to run it on production to
+  troubleshoot. But this would require:
+    * Having a better overall architecture for the wholesome agentic AI
+      troubleshooting solution on a cluster
+    * Solving the compute problem of how and where to run the model
+    * Having an air tight security vetting.
+
+  Running this stack in production is out-of-scope for this OKEP; it is a
+  future-stretch goal contingent on security and testing milestones.
+
+# Non-Goals
+
+* We will not be solving the problem around which LLM should be used. Most of
+  the good ones (based on community member experience) were proprietary (claude sonnet4
+  and gemini2.5pro) but testing all LLMs and coming up with which works the
+  best is not in scope
+    * By not solving this problem as part of this enhancement, we risk having
+      to deal with "long-context windows causing hallucinations" but that's
+      where RAGing mentioned in future goals could help.
+* We will also not be developing our own model to teach and maintain it
+    * By not solving this problem, we also risk relying on proprietary models
+      knowing and learning what we want them to learn but having no control on
+      how fast they learn it. Again RAGing could help here.
+    * So the quality here will heavily depend on how good the brain LLM is
+      which basically won't be in our control much.
+
+# Introduction
+
+An engineer troubleshooting OVN-Kubernetes usually uses the following set of
+CLI tools in a layered fashion:
+
+* Kubernetes and OVN-Kubernetes layer - this cluster state information is
+  gathered as part of must-gather for offline troubleshooting
+    * **kubectl** commands like list, get, describe, logs, exec, events to know
+      everything about the Kubernetes API state of the feature and to know what
+      the ovnkube pods were doing through the logs they generate during that
+      window
+    * **ovnkube-trace** which executes ovn-trace and ovs ofproto trace and
+      detrace commands (this tool doesn't support all scenarios - it's not
+      maintained well)
+    * Future-tool - ovnkube CLI (need to ask NVIDIA what's the status here)
+        * A place for K8s state-database syncer should/could potentially exist
+* OVN Layer - OVN databases are gathered as part of must-gather for
+  offline troubleshooting
+    * **ovn-nbctl** commands that are executed on the ovnkube node pods to
+      understand what OVN-Kubernetes created into northbound database via
+      libovsdbclient transactions
+    * **ovn-sbctl** commands that are executed on the ovnkube node pods to
+      understand what northd created into southbound database
+    * **ovn-trace and detrace** commands that are executed on the ovnkube node
+      pods to understand simulated packet flow tracing based on flows in
+      southbound database
+    * **ovn-appctl -t ovn-controller ct-zone-list** to list all the conntrack
+      zone to OVN construct mapping for better understanding how the conntrack
+      commit of the packets happened
+* OpenvSwitch Layer - openvswitch database is usually gathered as part
+  of sos-report for offline troubleshooting
+    * **ovs-ofctl dump-flows** to debug specially the breth0 openflows
+      that OVN-Kubernetes creates on the gateway of each node
+    * **ovs-appctl dpctl/dump-flows** to trace live packets run on a specific
+      node's ovs container (KIND) or on the node (OpenShift)
+    * **ovs-appctl ofproto/trace** and detrace to run an ovs trace of the
+      packet based on the openflows
+    * **ovs-appctl dpctl/dump-conntrack** to know all the conntrack zones used
+      for a specific connection
+    * **ovs-vsctl** commands to list interfaces and bridges
+    * **retis** to see packet drops in ovs
+* Netfilter/Kernel Layer - this information is usually gathered as part
+  of sos-report for offline troubleshooting
+    * **ip util commands** like **ip r** or **ip a** or **ip rule list** for
+      debugging VRFs, BGP learnt routes or the routes OVN-Kubernetes creates
+      on the node or custom routes that end user's add
+    * **nft list ruleset** to understand what rules were created by
+      OVN-Kubernetes specially in routingViaHost=true gateway mode
+    * **iptables-save** to list all iptables (Given iptables is deprecated,
+      I think we can skip this tool though for now 50% of ovn-kubernetes is
+      still on IPT)
+    * **conntrack** -L or -E on the host itself
+    * **ip xfrm policy** and **ip xfrm state** when using IPSEC
+* TCPDUMP - external open source tools, can't be used for offline
+  troubleshooting
+    * **tcpdump** is used for packet capture and analysis
+    * [**pwru**](https://github.com/cilium/pwru) is used to know the kernel drop reason
+    * **libreswan** `ipsec-stateus` and `ipsec-trafficstatus` commands
+    * **frr** frr router config and routes learnt by BGP
+
+Ideally speaking, there are metrics and events that via alerts also go to
+the dashboard which is probably what most end-users use to troubleshoot.
+So when we do the phase3 we would need to reconsider this stack of
+troubleshooting entirely for including other aspects like OVN-Kubernetes
+troubleshooting dashboard that the observability team created or the
+various packet drop tools observability team already exposes. But for
+the scope of this enhancement, for now, we will consider these above set
+of tools as MVP.
+
+As we can see, that's a lot of tools! So remembering the syntax for each of
+these tools always and executing them one-by-one and gathering the information
+at each layer, analysing them, and then moving to the next layer takes time
+for a human. Always during a remote analysis of bug report the part that takes
+the longest is the RCA by combing through all the data - same goes for
+troubleshooting a cluster (which is slightly easier when we have access to the
+cluster than analysing offline data). The fix is usually the easiest part (there are
+exceptions).
+
+```
+                    OVN-Kubernetes Architecture & Troubleshooting Tools
+                              (Per Node Components)
+
+┌────────────────────────────────────────────────────────────────────────────┐
+│                           ovnkube-node pod                                 │
+│                                                                            │
+│  ┌─────────────────┐    ┌─────────────────┐         kubectl exec/logs      │
+│  │  ovnkube        │◄──►│      NBDB       │◄─────── ovn-nbctl show/list    │
+│  │  controller     │    │  (northbound)   │                                │
+│  └─────────────────┘    └─────────────────┘                                │
+│           │                       │                                        │
+│           │              ┌─────────────────┐                               │
+│           │              │     northd      │                               │
+│           │              └─────────────────┘                               │
+│           │                       │                                        │
+│           │              ┌─────────────────┐                               │
+│           └─────────────►│      SBDB       │◄─────── ovn-sbctl show/list   │
+│                          │  (southbound)   │         ovn-trace/detrace     │
+│                          └─────────────────┘                               │
+│                                   │                                        │
+│                          ┌─────────────────┐                               │
+│                          │ ovn-controller  │◄─────── ovn-appctl ct-zone    │
+│                          └─────────────────┘                               │
+│                                   │                                        │
+└───────────────────────────────────┼────────────────────────────────────────┘
+                                    │
+                          ┌─────────────────┐
+                          │       OVS       │◄─────── ovs-vsctl list
+                          │   (database)    │         ovs-appctl dpctl/
+                          └─────────────────┘         ovs-ofctl dump-flows
+                                   │                  retis (packet drops)
+                          ┌─────────────────┐
+                          │   OVS bridge    │◄─────── ovs-appctl ofproto/trace
+                          │   br-int/breth0 │
+                          └─────────────────┘
+                                   │
+                          ┌─────────────────┐
+                          │      NIC        │
+                          │   (physical)    │
+                          └─────────────────┘
+                                   │
+                    ┌──────────────┴──────────────┐
+                    │         Host Network        │
+                    │                             │
+                    │  ip route/addr/rule ◄───────┼─────── ip commands
+                    │  nft ruleset        ◄───────┼─────── nft list ruleset
+                    │  iptables rules     ◄───────┼─────── iptables-save
+                    │  conntrack zones    ◄───────┼─────── conntrack -L/-E
+                    │                             │
+                    │  Network interfaces ◄───────┼─────── tcpdump/pwru
+                    └─────────────────────────────┘
+
+    Problem: Engineers must know WHERE to run WHICH tool on WHICH component
+             to troubleshoot issues across this distributed architecture
+```
+
+This enhancement aims to solve this pain point of reducing the time taken to
+execute these tools and analyse these results using MCP Servers and LLMs.
+
+# User Stories
+
+**As an OVN-Kubernetes developer**, **I want to** troubleshoot my stack
+without needing to know every tool's parameter fields by-heart or by spending
+time looking it up each time I need to troubleshoot a feature **so that** I
+can spend my time efficiently. I just want to tell in plain english what I
+want and for the MCP server to execute those specific commands.
+
+**As an OVN-Kubernetes engineer**, **I want to** troubleshoot my stack
+without needing to analyse each flow output of these tools when I need to
+troubleshoot a feature **so that** I can spend my time efficiently. I just
+want to tell in plain english what I want and for the LLM to help me analyze
+the output of the commands executed by the MCP Server. I understand that I
+will need to verify the reasoning thoroughly before accepting the RCA from AI.
+
+**As a new engineer joining the OVN-Kubernetes team**, **I want to** retrieve
+specific information from different parts of the stack without having
+knowledge of the topology or tooling of the stack.
+
+# Proposed Solution
+
+We build a Golang MCP Server (could be split into a set of MCP Servers in the
+future) that exposes these tools in read only fashion and the LLM backend
+that has the required context will analyse the results of the execution and
+provide a response back to the prompter who has to verify it thoroughly. This
+MCP Server code will be in a new repo in
+[ovn-kubernetes org](https://github.com/ovn-kubernetes) called
+**ovn-kubernetes-mcp**.
+
+## Example Workflow for an end-user
+
+1. An end-user can use any MCP Client to start a troubleshooting session via
+   prompting. The client connects with all the available servers (in our case
+   the OVN-Kubernetes MCP Server and maybe in the future all layered community
+   MCP Severs) and gathers their available tools and presents this information
+   to the LLM along with their schemas. Example: Using Cursor AI as your MCP
+   Client
+2. LLM will be able use its intelligence and analyze the end-user query and
+   choose the appropriate tools. Example: Using Claude Sonnet4 as your LLM
+   model
+3. MCP Client then receives the LLM's tool call and routes it to the
+   corresponding MCP Server which executes the tool and client then relays
+   the response back to the LLM
+4. LLM again uses its intelligence to analyze the responses
+5. LLM provides a RCA back to the end user
+
+The steps 2, 3 and 4 is repeated by the LLM and it intelligently does a step-by-step
+layered troubleshooting exercise to find the root cause.
+
+We may also include predefined and tested prompt steps in the documentation
+of our repo for helping end-users to give the LLM good context around
+OVN-Kubernetes and OVN and OVS. For example, provide the latest OVN man
+pages so that it has the current knowledge of OVN DB schemas and tools usage.
+Some standard preparation prompt can be maintained in the mcp-server repo
+as reference.
+
+```
+OVN-Kubernetes Troubleshooting MCP Architecture
+===============================================
+
+Engineer Query: "Pod A can't reach Pod B on different nodes, consistent connection drops"
+
+┌──────────────────────────────────────────────────────────────────────────────┐
+│                    MCP SERVERS (Layer-Specific Tools)                        │
+└──────────────────────────────────────────────────────────────────────────────┘
+
+┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐
+│   Kubernetes/K8s     │ │     OVN Layer        │ │   OpenvSwitch        │ │   Netfilter/         │ │   TCPDUMP/Debug      │
+│    MCP Server        │ │    MCP Server        │ │   Layer MCP          │ │   Kernel MCP         │ │    MCP Server        │
+│                      │ │                      │ │   Server             │ │   Server             │ │                      │
+│ Tools:               │ │ Tools:               │ │                      │ │                      │ │ Tools:               │
+│ • kubectl_get        │ │ • ovn_nbctl_show     │ │ Tools:               │ │ Tools:               │ │ • tcpdump_capture    │
+│ • kubectl_describe   │ │ • ovn_nbctl_list     │ │ • ovs_ofctl_flows    │ │ • ip_route_show      │ │ • tcpdump_analyze    │
+│ • kubectl_logs       │ │ • ovn_sbctl_show     │ │ • ovs_appctl_dpctl   │ │ • ip_addr_show       │ │ • pwru_trace         │
+│ • kubectl_exec       │ │ • ovn_sbctl_list     │ │ • ovs_ofproto_trace  │ │ • ip_rule_show       │ │                      │
+│ • kubectl_events     │ │ • ovn_trace          │ │ • ovs_appctl_conntr  │ │ • nft_list_ruleset   │ │                      │
+│ • ovnkube_trace      │ │ • ovn_detrace        │ │ • ovs_vsctl_show     │ │ • iptables_save      │ │                      │
+│                      │ │ • ovn_controller_ct  │ │ • retis_capture      │ │ • conntrack_list     │ │                      │
+│                      │ │                      │ │                      │ │ • conntrack_events   │ │                      │
+└──────────────────────┘ └──────────────────────┘ └──────────────────────┘ └──────────────────────┘ └──────────────────────┘
+
+                                │ All tools aggregated
+                                ▼
+
+┌──────────────────────────────────────────────────────────────────────────────┐
+│                           MCP CLIENT                                         │
+│                    (OVN-K8s Troubleshoot AI)                                 │
+│                                                                              │
+│ Unified Tool Interface:                                                      │
+│ ┌──────────────────────────────────────────────────────────────────────────┐ │
+│ │ Layer 1 (K8s):    kubectl_*, ovnkube_trace                               │ │
+│ │ Layer 2 (OVN):    ovn_nbctl_*, ovn_sbctl_*, ovn_trace_*                  │ │
+│ │ Layer 3 (OVS):    ovs_ofctl_*, ovs_appctl_*, ovs_vsctl_*, retis_*        │ │
+│ │ Layer 4 (Kernel): ip_*, nft_*, iptables_*, conntrack_*                   │ │
+│ │ Layer 5 (Debug):  tcpdump_*, pwru_*                                      │ │
+│ └──────────────────────────────────────────────────────────────────────────┘ │
+└──────────────────────────────────────────────────────────────────────────────┘
+
+                        │ Present unified interface
+                        ▼
+
+┌──────────────────────────────────────────────────────────────────────────────┐
+│                        LLM (OVN-K8s Expert)                                  │
+└──────────────────────────────────────────────────────────────────────────────┘
+```
+
+Doing multiple MCP Servers has clear advantages like each layer owning their
+toolset, independent development and maintenance, granular RBAC, reusability,
+one server can't affect the other. Given we would need to align with different
+layered communities later on for a fully supported set of servers from their
+side which might take several releases, for the phase1 here, we plan to build
+our own monolithic server that could then be split into multiple servers later
+on. A single unified server is simpler, faster to iterate on.
+
+So our current implementation design looks like this:
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                              MCP Client                                     │
+│                        "Debug pod connectivity issues"                      │
+└─────────────────────────────────────────────────────────────────────────────┘
+                                       │
+                                 MCP Protocol
+                                       │
+                                       ▼
+┌──────────────────────────────────────────────────────────────────────────────┐
+│                        OVN-Kubernetes MCP Server                             │
+│                                                                              │
+│  ┌─────────────────┐  ┌─────────────────┐  ┌──────────────────────────────┐  │
+│  │   Kubernetes    │  │  Live Cluster   │  │      Offline Bundle          │  │
+│  │     Layer       │  │   Execution     │  │       Execution              │  │
+│  │                 │  │                 │  │                              │  │
+│  │ • kubectl get   │  │ kubectl exec    │  │ Offline artifacts parser     │  │
+│  │ • kubectl desc  │  │                 │  │ tools.                       │  │
+│  │ • kubectl logs  │  │ Direct API      │  │ Example: xsos and omc        │  │
+│  └─────────────────┘  └─────────────────┘  └──────────────────────────────┘  │
+│                                                                              │
+│  ┌──────────────────────────────────────────────────────────────────────┐    │
+│  │                      Tool Categories                                 │    │
+│  │                                                                      │    │
+│  │  ┌─────────────┐  ┌─────────────┐  ┌──────────────┐  ┌─────────────┐ │    │
+│  │  │ OVN Layer   │  │ OVS Layer   │  │Kernel Layer  │  │External     │ │    │
+│  │  │             │  │             │  │              │  │Tools        │ │    │
+│  │  │ ovn-nbctl   │  │ ovs-ofctl   │  │ ip route     │  │ tcpdump     │ │    │
+│  │  │ ovn-sbctl   │  │ ovs-appctl  │  │ nft list     │  │ pwru        │ │    │
+│  │  │ ovn-trace   │  │ ovs-vsctl   │  │ conntrack    │  │ retis       │ │    │
+│  │  │ ovn-detrace │  │ ovs-dpctl   │  │ iptables-save│  │             │ │    │
+│  │  │ ovn-appctl  │  │ ovs-ofproto │  │              │  │             │ │    │
+│  │  └─────────────┘  └─────────────┘  └──────────────┘  └─────────────┘ │    │
+│  └──────────────────────────────────────────────────────────────────────┘    │
+│                                                                              │
+│  ┌──────────────────────────────────────────────────────────────────────┐    │
+│  │                      Security & RBAC                                 │    │
+│  │                                                                      │    │
+│  │ • ovn-troubleshooter ClusterRole (read-only)                         │    │
+│  │ • Command parameter validation                                       │    │
+│  │ • Node-specific targeting required                                   │    │
+│  │ • Write operations blocked                                           │    │
+│  └──────────────────────────────────────────────────────────────────────┘    │
+└──────────────────────────────────────────────────────────────────────────────┘
+                                       │
+                           ┌───────────┼───────────┐
+                           │           │           │
+                           ▼           ▼           ▼
+                      ┌────────┐  ┌────────┐  ┌────────┐
+                      │ Node1  │  │ Node2  │  │ NodeN  │
+                      │        │  │        │  │        │
+                      │ovnkube-│  │ovnkube-│  │ovnkube-│
+                      │node pod│  │node pod│  │node pod│
+                      │        │  │        │  │        │
+                      │ ovn-nb │  │ ovn-nb │  │ ovn-nb │
+                      │ ovn-sb │  │ ovn-sb │  │ ovn-sb │
+                      │ ovs    │  │ ovs    │  │ ovs    │
+                      └────────┘  └────────┘  └────────┘
+
+Data Flow Examples:
+├─ Live: kubectl exec ovnkube-node-xyz -c nb-ovsdb -- ovn-nbctl show
+├─ Live: kubectl debug node/worker-1 -- ovs-ofctl dump-flows br-int
+├─ Live: kubectl debug node/worker-1 -- ip route show table all
+├─ Offline: Parse must-gather/ovn-kubernetes/ovn-northbound.db
+├─ Offline: Parse sos-report/node-1/openvswitch/ovs-ofctl_dump-flows
+└─ Offline: Parse sos-report/node-1/networking/ip_route
+```
+
+Note: Some of the layers like Kubernetes and Offline Debugging have
+existing servers like [kubernetes-mcp-server](https://github.com/containers/kubernetes-mcp-server)
+and [mustgather-mcp-server](https://github.com/shivprakashmuley/mustgather-mcp-server)
+can be re-used together with ovn-kubernetes-mcp server for holistic
+end user experience. However kubernetes-mcp-server exposes `kubectl-exec`
+which has security implications (although they also have a read-only
+mode where only read commands are exposed).
+
+Note2: Feeding container logs to LLM will make the context window
+full pretty fast. We need to investigate a method to ensure we are
+filtering out relevant logs to feed.
+
+## Implementation Details of the OVN-Kubernetes MCP Server
+
+See the Alternatives section for other ideas that were discarded.
+
+### Chosen Approach: Direct CLI Tool Exposure (Idea1)
+
+The initial implementation takes a pragmatic approach by directly exposing the
+existing CLI tools (`ovn-nbctl`, `ovn-sbctl`, `ovs-vsctl`, etc.) as MCP tools.
+While this approach may seem less elegant than creating higher-level wrapper
+abstractions to connect to the database (see discarded alternatives), it
+offers the fastest path to value for OVN-Kubernetes engineers who are already
+familiar with these tools but want to leverage LLM assistance for command
+construction and output analysis. The CLI-based approach is also optimal for
+exposing the complete troubleshooting toolkit across all layers of the stack.
+Most other discarded ideas only addressed OVSDB access and reusability
+concerns while failing to provide the holistic set of tools needed for
+comprehensive root cause analysis like running packet traces.
+
+The MCP server acts as a secure execution bridge, translating natural language
+troubleshooting requests into appropriate CLI commands.
+
+**Advantages**:
+
+* **Fastest Time to Value**: Leverages existing tools that engineers already
+  know
+* **Zero Deployment Overhead**: All required CLI binaries are already present
+  in the pods and nodes within the cluster
+* **Comprehensive Coverage**: Only approach that provides access to the
+  complete troubleshooting toolkit across all stack layers
+* **Security Controls**: Enables read-only access enforcement by exposing
+  only the tools with read-only access (get/list)
+* **No version compatibility issues** between the MCP server tools and the
+  cluster's installed versions when running on live cluster environments
+
+**Trade-offs**:
+
+* **Limited Reusability**: Somewhat specific to OVN-Kubernetes deployment
+  patterns and can't be reused in other layers like OVS
+
+This approach was selected as the optimal balance between security,
+functionality, and development effort.
+
+## Security Model and RBAC Constraints
+
+No matter how we approach this implementation, it is impossible to totally
+secure the execution of networking troubleshooting tools at this stage. Most
+networking tools require privileged access to function properly, creating
+inherent security trade-offs. The goal is therefore to minimize blast radius
+through layered controls rather than achieve perfect isolation.
+
+**Kubernetes Layer Security:**
+
+* `kubectl exec` and `kubectl debug node` operations require cluster-admin level
+  privileges by design. Avoid exposing generic exec in the K8s layer.
+  Prefer direct Kubernetes API reads (`get`, `list`, `logs`, `events`).
+* Alternative approach using custom debug containers
+  (`kubectl debug node --image=<custom-image>`) with volume mounts to database
+  files reduces some attack surface but remains intrusive
+* **Mitigation**: Expose only Kubernetes API read operations (`get`, `list`,
+  `logs`, `events`); remove any generic exec tool in this layer.
+* **Constraint**: MCP Server service account requires elevated privileges
+  (including `pods/exec`) despite being conceptually a "troubleshooter" role
+
+At first, for the kubernetes layer, we thought of leveraging the opensource
+[kubernetes-mcp-server](https://github.com/containers/kubernetes-mcp-server).
+So whatever security posture they use for now, can be adopted. They have a
+`read-only` mode and a mode where write can be done via kubectl exec.
+Later, after getting reviews, this enhancement has changed the approach
+and pivotted towards opting into a more secure approach of adding a
+tool that is a wrapper on top of `kubectl-exec` without directly exposing
+kubectl-exec. So instead of relying on `kubernetes-mcp-server`, our
+`ovn-kubernetes-mcp` will take the more secure approach of only using
+`kubectl_exec` as an implementation detail but not directly expose it.
+Downside of this is that we will need to also account for get or list
+resources command being duplicated into `ovn-kubernetes-mcp`. So we
+would need to implement the tools we need also on the kubernetes layer
+ourselves.
+
+**OVN/OVS Database Layer Security:**
+
+* Unix socket-based database access prevents using SSL certificate-based
+  authentication and authorization
+* Database connections inherit the security context of the container executing
+  the commands
+* **Mitigation**: Command parameter validation to ensure only read-only
+  database operations (`show`, `list`, `dump`) while blocking modification
+  commands (`set`, `add`, `remove`)
+* **Long-term Path**: Requires RBAC-enabled CLI execution even against local
+  unix socket.
+
+**Host/Kernel Layer Security:**
+
+* Kernel-level networking tools (`ip`, `nft`, `conntrack`) inherently require
+  root system access
+* Current tooling lacks granular RBAC capabilities - tools are typically
+  all-or-nothing from a privilege perspective
+* **External Tools Note**: Tools like `tcpdump -i any` can be highly intrusive
+  as they capture all network traffic on the host, requiring careful
+  consideration of privacy and performance impact while being chosen for
+  execution.
+* **Short-term Mitigation**: Strict command allowlisting exposing only read
+  operations (`ip route show`, `nft list ruleset`) while blocking modification
+  commands (`ip route add`, `nft add rule`)
+* **Long-term Path**: Requires RBAC-enabled wrapper tools from upstream
+  layered community teams (example netfilter, kernel networking)
+
+## Distributed Execution Context
+
+MCP Server will only be supported on interconnect mode.
+
+### **OVN-Kubernetes Architecture Challenge**
+
+In OVN-Kubernetes interconnect architecture, each node maintains its own local
+instances of critical databases and services:
+
+* **Northbound Database**: Local OVN northbound database per node
+* **Southbound Database**: Local OVN southbound database per node
+* **OpenVSwitch Database**: Node-specific OVS database and flow tables
+* **Host Networking**: Node-specific routing tables, conntrack zones, and
+  kernel state
+
+This distributed architecture means that troubleshooting commands must be executed
+on the specific node where the relevant data resides.
+
+### **Node-Targeted Command Execution**
+
+**Node Selection Strategy**: All tools requiring node-specific data accept a
+`node_name` parameter as a required argument. The MCP server uses this
+parameter to:
+
+1. **Pod Selection**: Locate the appropriate `ovnkube-node` pod running on the
+   specified node using `pod.spec.nodeName` matching
+2. **Container Targeting**: Route OVN database commands to the correct
+   container within the ovnkube-node pod (e.g., `nb-ovsdb`, `sb-ovsdb`
+   containers)
+3. **Execution Context**: Execute host-level commands via
+   `kubectl debug node/<node_name> --image <>` for direct host access
+
+**LLM Responsibility**: The LLM must determine the appropriate target node(s)
+based on the troubleshooting context:
+
+* **Pod-specific issues**: Use the node where the problematic pod is scheduled
+  (`kubectl get pod -o wide`)
+* **Network flow analysis**: Target nodes along the packet path (source node,
+  destination node, gateway nodes)
+* **Cluster-wide analysis**: Potentially execute commands across multiple
+  nodes for correlation
+
+We need to account for testing the LLM Responsibility side which is not
+something we can guarantee but something we are offloading to the LLM that we
+won't solve.
+
+## Deployment Strategy
+
+### **Flexible Deployment Modes**
+
+The MCP server is designed for flexible deployment without requiring elaborate
+cluster infrastructure. Multiple deployment modes support different use cases
+and security requirements:
+
+**CLI Tool Mode (Simplest)**:
+
+* Run the MCP server binary directly on a machine with `kubectl` access
+  to the target cluster
+* Server uses existing cluster credentials and executes commands via standard
+  CLI tools
+* Suitable for both live cluster troubleshooting
+* Offline data based troubleshooting analysis would still need corresponding
+  parser tools to be run locally where the debug artifact files are hosted
+* No cluster deployment required - operates entirely through external API
+  access
+
+**Debug Container Mode**:
+
+* Package the MCP server as a container image that the LLM can select for
+  `kubectl debug node --image=<mcp-server-image>` operations
+* This custom debug image contains the MCP server binary along with all
+  necessary troubleshooting tools
+* Reduces blast radius compared to using default debug images with full host
+  access
+* The LLM chooses this image when it needs to execute commands requiring
+  direct host access
+
+**Future Considerations**:
+
+* More elaborate deployment patterns (DaemonSet, Deployment) can be considered
+  when we think of use cases beyond ovn-kubernetes developers
+
+## Testing Strategy
+
+**Unit Testing - MCP Server Tools**:
+
+* Straightforward validation of individual tool execution and parameter
+  handling. Use [mcp-inspector](https://github.com/modelcontextprotocol/inspector).
+* Mock cluster responses to test command routing and error handling
+* Verify security controls and command allowlisting functionality
+
+**Integration Testing - The Complex Challenge**:
+
+* **Real Failure Scenario Reproduction**: Design test scenarios based on past
+  bugs and commonly occurring incidents
+* **Chaos Engineering Integration**: Implement controlled failure injection to
+  create realistic troubleshooting scenarios
+* **LLM Reasoning Validation**: The most critical and challenging aspect -
+  verifying that the LLM can produce meaningful root cause analysis from tool
+  outputs
+
+### **Scenario-Based Test Design**
+
+**Historical Incident Replay**:
+
+* Collect must-gather and sos-report bundles from past bugs
+* Use these as offline test datasets to validate LLM troubleshooting accuracy
+* Build regression test suite ensuring consistent analysis quality over time
+
+**Synthetic Failure Scenarios**:
+
+Some examples include:
+* Network policy and EgressIP misconfigurations
+* Pod connectivity failures across nodes
+* Gateway flow issues and routing problems
+* OVN database inconsistencies specially around EgressIPs
+
+**LLM Capability Assessment**:
+
+* Measure accuracy of root cause identification - depends on how much
+  OVN-Kubernetes feature specific context it has
+* Evaluate quality of troubleshooting step recommendations
+* Test correlation of multi-layer data analysis
+* Validate handling of incomplete or missing data scenarios
+
+### **Success Metrics**
+
+* **Accuracy**: Percentage of correct root cause identifications in known
+  failure scenarios
+* **Completeness**: Coverage of troubleshooting steps recommended vs. manual
+  expert analysis
+* **Efficiency**: Time reduction compared to manual troubleshooting workflows
+* **Safety**: Verification that only read-only operations are executed as
+  intended
+
+## Documentation Details
+
+OKEP for the MCP Server will be on
+[https://ovn-kubernetes.io/](https://ovn-kubernetes.io/) . All end-user
+documentation will be on the new repo's docs folder.
+
+* **Getting Started Guide**: MCP client setup and initial configuration
+* **Troubleshooting Scenarios**: Common use cases and example natural language
+  queries
+* **Tool Reference**: Available tools and their capabilities across all stack
+  layers
+* **Security Model**: Warning around security considerations
+* **Deployment Options**: CLI mode vs. debug container mode setup instructions
+* **Offline Analysis**: Must-gather and sos-report analysis workflows
+
+## Alternative Implementation Ideas
+
+### Idea0: Using Existing kubernetes-mcp-server with Generic Bash Execution
+
+**Approach**: Use the existing kubernetes-mcp-server's `pods_exec` tool to run
+arbitrary bash commands like `ovn-nbctl show` or `ip route` directly through
+`kubectl exec` or `kubectl debug` sessions, without building any specialized
+tooling.
+
+**Rationale**: This approach would provide immediate access to all CLI tools
+without any development effort, leveraging the LLM's knowledge of command
+syntax to construct appropriate bash commands.
+
+**Why Discarded**:
+
+* **Security Risk**: Allowing arbitrary bash command execution creates
+  significant security vulnerabilities. There's no protection against
+  destructive commands like `ovn-nbctl set` operations that could modify live
+  database state.
+* **Lack of Access Control**: No way to enforce read-only operations or
+  validate command parameters before execution.
+* **Separation of Concerns**: The LLM should focus on analysis and
+  troubleshooting logic, not on understanding the security implications of
+  direct system access.
+* **Blast Radius**: Any compromise or LLM hallucination could potentially
+  execute dangerous commands on production systems.
+
+The fundamental principle that "each layer knows best about how/what tools to
+allow with proper read access" makes a controlled wrapper approach essential
+rather than direct bash execution.
+
+### Idea1: Chosen Approach: Direct CLI Tool Exposure
+
+See the proposed solution section.
+
+### Idea2: libovsdb-client Golang Wrapper
+
+**Approach**: Build a Golang MCP server using `NewOVSDBClient` to directly
+query OVSDB instances with proper RBAC controls implemented through OVSDB's
+native access control mechanisms.
+
+**Advantages**:
+* **High Reusability**: Could be shared across OVN, OVS, and OVN-Kubernetes
+  projects
+* **Native RBAC**: Leverages OVSDB's built-in role-based access controls
+* **Structured Output**: Returns structured data rather than CLI text parsing
+
+**Why Discarded**:
+* **Deployment Model**: Would require running as a DaemonSet on each node or
+  shipping binaries to ovnkube-node pods
+* **Scope Limitation**: Only addresses database access, missing ovn and ovs
+  flow trace simulation and host networking tools
+
+### Idea3: ovsdb-client Binary Wrapper
+
+**Approach**: Create a wrapper around the existing `ovsdb-client` binary
+(owned by the OVS team) to provide structured database access.
+
+**Advantages**:
+* **High Reusability**: Could be shared across OVN, OVS, and OVN-Kubernetes
+  projects
+* **Native RBAC**: Leverages OVSDB's built-in role-based access controls
+
+**Why Discarded**:
+* **Ownership**: We could argue this wrapper better belongs in the openvswitch
+  community then in OVN-Kubernetes org
+* **Scope Limitation**: Only addresses database access, missing ovn and ovs
+  flow trace simulation and host networking tools
+* In future once we reach out to OVN and OVS communities to see what
+  they plan to do, we could revisit it.
+
+### Idea4: Direct Database Access Wrapper
+
+**Approach**: Build wrapper for direct database read operations bypassing CLI
+and client tools entirely.
+
+**Advantages**:
+* **High Reusability**: Could be shared across OVN, OVS, and OVN-Kubernetes
+  projects
+
+**Why Discarded**: Building from scratch when there's no real need to -
+existing CLI tools already provide all necessary functionality with proven
+reliability.
+
+# Known Risks and Limitations
+
+* AI! We can trust it only as much as we can throw it.
+    * Quality of this troubleshooter depends on the LLM's intelligence
+    * Quality of the MCP Server itself is however in our own hands and can be
+      enhanced based on user experience
+* Security! We know that we cannot fully eliminate the risk
+* Performance/Scalability: MCPs are a relatively new concept. So aspects like
+  how many tools could we expose per server and upto what point it scales etc
+  are unknowns. We will need to try and test it in our PoCs as we develop this.
+  * With sending bulky logs and debug details we also have danger of context
+    window running out. We need to ensure we filter out relevant logs.
+  * Token consumption and context window bloating.
+  * Other potential problems we need to rule out during testing:
+    * Poor tool selection: LLMs struggle to choose the right tool from too many options
+    * Parameter hallucination: Agents invoke tools with incorrect or fabricated parameters
+    * Misinterpretation: Responses from tools are more likely to be misunderstood
+    * Attention spreading: The model's attention gets distributed thinly across many options
diff --git a/docs/okeps/okep-5552-dynamic-udn-node-allocation.md b/docs/okeps/okep-5552-dynamic-udn-node-allocation.md
new file mode 100644
index 0000000000..1a68b9efb6
--- /dev/null
+++ b/docs/okeps/okep-5552-dynamic-udn-node-allocation.md
@@ -0,0 +1,205 @@
+# OKEP-5552: Dynamic UDN Node Allocation
+
+* Issue: [#5552](https://github.com/ovn-org/ovn-kubernetes/issues/5552)
+
+## Problem Statement
+
+When scaling UDNs, the control-plane cost of rendering a topology is high. This is the core limiting factor to
+being able to scale to 1000s of UDNs. While there are plans to also improve network controller performance with UDNs,
+there is still valuable savings to be had by not rendering UDNs on nodes where they are not needed.
+
+An example use case where this makes sense is when a Kubernetes cluster has its node resources segmented per tenant. In
+this case, it only makes sense to run the tenant network (UDN) on the nodes where a tenant is allowed to run pods. This
+allows for horizontal scaling to much higher number of overall UDNs running in a cluster.
+
+## Goals
+
+ * To dynamically allow the network to only be rendered on specific nodes.
+ * To increase overall scalability of the number UDNs in a Kubernetes cluster with this solution.
+ * To increase the efficiency of ovnkube operations on nodes where a UDN exists, but is not needed.
+
+## Non-Goals
+
+ * To fully solve control plane performance issues with UDNs. There will be several other fixes done to address that
+   outside of this enhancement.
+ * To provide any type of network security guarantee about exposing UDNs to limited subset of nodes.
+
+## Future Goals
+
+ * Potentially enabling this feature on a per UDN basis, rather than globally.
+
+## Introduction
+
+The purpose of this feature is to add a configuration knob that users can turn on which will only render UDNs on nodes
+where pods exist on that UDN. This feature will allow for higher overall UDN scale and less per-node control plane resource usage
+under conditions where clusters do not have pods on every node, with connections to all UDNs. For example, if I have
+1000 UDNs and 500 nodes, if a particular node only has pods connected to say 200 of those UDNs, then my node is only
+responsible for rendering 200 UDNs instead of 1000 UDNs as it does today.
+
+This can provide significant control plane savings, but comes at a cost. Using the previous example, if a pod is now
+launched in UDN 201, the node will have to render UDN 201 before the pod can be wired. In other words, this introduces
+a one time larger pod latency cost for the first pod wired to the UDN. Additionally, there are more tradeoffs with other
+feature limitations outlined later in this document.
+
+## User-Stories/Use-Cases
+
+Story 1: Segment groups of nodes per tenant
+
+As a cluster admin, I plan to dedicate groups of nodes to either a single tenant or small group of tenants. I plan
+to create a CUDN per tenant, which means my network will only really need to exist on this group of nodes. I would
+like to be able to limit this network to only be rendered on that subset of nodes.
+This way I will be able to have less resource overhead from OVN-Kubernetes on each node,
+and be able to scale to a higher number of UDNs in my cluster.
+
+## Proposed Solution
+
+The proposed solution is to add a configuration knob to OVN-Kubernetes, "--dynamic-udn-allocation", which will enable
+this feature. Once enabled, NADs derived from CUDNs and UDNs will only be rendered on nodes where there is a pod
+scheduled in that respective network. Additionally, if the node is scheduled as an Egress IP Node for a UDN, this node
+will also render the UDN.
+
+When the last pod on the network is deleted from a node, OVNK will not immediately tear down the UDN.
+Instead, OVNK will rely on a dead timer to expire to conclude that this UDN is no longer in use and
+may be removed. This timer will also be configurable in OVN-Kubernetes as "--udn-deletion-grace-period".
+
+### API Details
+
+There will be no API changes. There will be new status conditions introduced in the section below.
+
+### Implementation Details
+
+In OVN-Kubernetes we have three main controllers that handle rendering of networking features for UDNs. They exist as
+ - Cluster Manager - runs on the control-plane, handles cluster-wide allocation, rendering of CUDN/UDNs
+ - Controller Manager - runs on a per-zone basis, handles configuring OVN for all networking features
+ - Node Controller Manager - runs on a per-node basis, handles configuring node specific things like nftables, VRFs, etc.
+
+With this change, Cluster Manger will be largely untouched, while Controller Manager and Node Controller Manager will be
+modified in a few places to filter out rendering UDNs when a pod doesn't exist.
+
+#### Internal Controller Details
+
+In OVN-Kubernetes we have many controllers that handle features for different networks, encompassed under three
+controller manager containers. The breakdown of how these will be modified is outlined below:
+
+* Cluster Manager
+  * UDN Controller — No change
+  * Route Advertisements Controller — No change
+  * Egress Service Cluster — Doesn't support UDN
+  * Endpoint Mirror Controller — No change
+  * EgressIP Controller — No change
+  * Unidling Controller — No change
+  * DNS Resolver — No change
+  * Network Cluster Controller — Modified to report status and exclude nodes not serving the UDN
+* Controller Manager (ovnkube-controller)
+  * Default Network — No change
+  * NAD Controller — Ignore NADs for UDNs that are not active on this node (no pods for the UDN and not an EIP node)
+* Node Controller Manager
+  * Default Network — No change
+  * NAD Controller — Ignore NADs for UDNs that are not active on this node (no pods for the UDN and not an EIP node)
+
+The resulting NAD Controller change will filter out NADs that do not apply to this node, stopping NAD keys from being
+enqueued to the Controller Manager/Node Controller Manager's Network Manager. Those Controller Managers will not need
+to create or run any sub-controllers for nodes that do not have the network. To do this cleanly, NAD Controller will be
+modified to hold a filterFunc field, which the respective controller manager can set in order to filter out NADs. For
+Cluster Manager, this function will not apply, but for Controller Manager and Node Controller Manager it will be a function
+that filters based on if the UDN is serving pods on this node.
+
+#### New Pod/EgressIP Tracker Controller
+
+In order to know whether the Managers should filter out a UDN, a pod controller and egress IP controller will be used
+in the Managers to track this information in memory. The pod controller will be a new level driven controller for
+each manager. For Egress IP, another new controller will be introduced that watches EgressIPs, Namespaces, and NADs in
+order to track which NAD maps to a node serving Egress IP.
+
+When Managers are created, they will start these Pod/EgressIP Tracker Controllers, and set a filterFunc on NAD Controller.
+The filterFunc will query the aforementioned controllers to determine if the NAD being synced matches the local node. If
+not, then NADController will not create the UDN controller for that network.
+
+Additionally, the Pod/EgressIP Tracker Controllers will expose a callback function, called "onNetworkRefChange". When
+the first pod is detected as coming up on a node + NAD combination, or the node activates as an Egress IP node for the
+first time, onNetworkRefChange will be triggered, which allows a callback mechanism to be leveraged for events. The
+Controller Manager and Node Controller Manager will leverage this callback, so that they can trigger NAD Controller to
+reconcile the NAD for these events. This is important as it provides a way to signal that NADController should remove
+a UDN controller if it is no longer active, or alternatively, force the NAD Controller to reconcile a UDN Controller if for example,
+a new remote node has activated.
+
+#### Other Controller Changes
+
+The Layer3 network controller will need to filter out nodes where the UDN is not rendered. Upon receiving events,
+they will query a Manager function called NodeHasNAD. Managers will export a Tracker interface, that only contains this
+method for UDN Controllers to query. The implementation of NodeHasNAD will rely on the Manager querying their pod and
+egress IP trackers.
+
+Upon UDN activation of a remote node, these controllers will need to receive events in order to reconcile the new remote node. 
+To do this, the corresponding tracker will trigger its callback, "onNetworkRefChange". That will trigger the Manager
+to ask NAD Controller to reconcile the UDN controller belonging to this NAD. Once that Layer 3 UDN controller reconciles,
+it will walk nodes and determine what needs to be added or removed. It will take the applicable nodes, set their
+syncZoneICFailed status, then immediately queue the objects to the retry framework with no backoff. This will allow
+the Zone IC (ZIC) controller to properly configure the transit switch with the remote peers, or tear it down, if necessary.
+
+#### Status Condition and Metric Changes
+
+A new status condition will be added to CUDN/UDN that will indicate how many nodes are selected for a network:
+```yaml
+status:
+  conditions:
+    - type: NodesSelected
+      status: "True"
+      reason: DynamicAllocation
+      message: "5 nodes rendered with network"
+      lastTransitionTime: 2025-09-22T20:10:00Z
+```
+
+If the status is "False", then no nodes are currently allocated for the network - no pods or egress IPs assigned.
+
+Cluster Manager will leverage instances of EgressIP and Pod Trackers in order to use that data for updating this status.
+The nodes serving a network are defined as a node with at least one OVN networked pod or having an Egress IP assigned to
+it on a NAD that maps to a UDN or CUDN.
+
+Additionally, events will be posted to the corresponding UDN/CUDN when nodes have become active or inactive for
+a node. This was chosen instead of doing per node status events, as that can lead to scale issues. Using events provides
+the audit trail, without those scale implications. The one drawback of this approach pertains to UDN deactivation. There
+is an "udn-deletion-grace-period" timer used to delay deactivation of a UDN on a node. This is to prevent churn if a pod
+is deleted, then almost immediately re-added. Without storing the timestamp in the API, we are relying internally on in
+memory data. While this is fine for normal operation, if OVN-Kube pod restarts, we lose that context. However, this should
+be fine as when we restart we have to walk and start all network controllers anyway, so we are not really creating a lot of
+extra work for OVN-Kube here.
+
+A metric will also be exposed which allows the user to track over time how many nodes were active for a particular
+network.
+
+### Testing Details
+
+* Unit Tests will be added to ensure the behavior works as expected, including checking that
+OVN switches/routers are not created there is no pod/egress IP active on the node, etc.
+* E2E Tests will be added to create a CUDN/UDN with the feature enabled and ensure pod traffic works correctly between nodes.
+* Benchmark/Scale testing will be done to show the resource savings of 1000s of nodes with 1000s of UDNs.
+
+### Documentation Details
+
+* User-Defined Network feature documentation will be updated with a user guide for this new feature.
+
+## Risks, Known Limitations and Mitigations
+
+Risks:
+ * Additional first-pod cold start latency per UDN/node. Could impact pod readiness SLOs.
+ * Burst reconcile load on large rollouts of pods on inactive nodes.
+
+Limitations:
+ * No OVN central support.
+ * NodePort/ExternalIP services with external traffic policy mode "cluster", will not work when sending traffic to inactive nodes.
+ * MetalLB must be configured on nodes where the UDN is rendered. This can be achieved by scheduling a daemonset for the designated nodes on the UDN.
+
+## OVN Kubernetes Version Skew
+
+Targeted for release 1.2.
+
+## Alternatives
+
+Specifying a NodeSelector in the CUDN/UDN CRD in order to determine where a network should be rendered. This was the
+initial idea of this enhancement, but was evaluated as less desirable than dynamic allocation. The dynamic allocation
+provides more flexibility without a user/admin needing to intervene and update a CRD.
+
+## References
+
+None
diff --git a/go-controller/Makefile b/go-controller/Makefile
index 27ebf94c8b..8224451f9e 100644
--- a/go-controller/Makefile
+++ b/go-controller/Makefile
@@ -102,7 +102,8 @@ ifeq ($(CONTAINER_RUNNABLE), 0)
 	@GOPATH=${GOPATH} ./hack/lint.sh $(CONTAINER_RUNTIME) fix || { echo "ERROR: lint fix failed! There is a bug that changes file ownership to root \
 	when this happens. To fix it, simply run 'chown -R <user>:<group> *' from the repo root."; exit 1; }
 else
-	echo "linter can only be run within a container since it needs a specific golangci-lint version"; exit 1
+	echo "no container runtime. attempting to run natively";
+	@GOPATH=${GOPATH} ./hack/lint.sh run-natively || { echo "running lint locally failed!"; exit 1;
 endif
 
 gofmt:
diff --git a/go-controller/hack/lint.sh b/go-controller/hack/lint.sh
index 92863e6b5c..09b47c3911 100755
--- a/go-controller/hack/lint.sh
+++ b/go-controller/hack/lint.sh
@@ -1,18 +1,28 @@
 #!/usr/bin/env bash
 VERSION=v1.64.8
-extra_flags=""
+: "${GOLANGCI_LINT_VERSION:=$VERSION}"
+extra_flags=(--verbose --print-resources-usage --modules-download-mode=vendor --timeout=15m0s)
 if [ "$#" -ne 1 ]; then
   if [ "$#" -eq 2 ] && [ "$2" == "fix" ]; then
-    extra_flags="--fix"
+    extra_flags+=(--fix)
   else
-    echo "Expected command line argument - container runtime (docker/podman) got $# arguments: $@"
+    echo "Expected command line argument - container runtime (docker/podman) or 'run-natively'; got $# arguments: $*"
     exit 1
   fi
 fi
 
-$1 run --security-opt label=disable --rm \
-  -v  ${HOME}/.cache/golangci-lint:/cache -e GOLANGCI_LINT_CACHE=/cache \
-  -v $(pwd):/app -w /app -e GO111MODULE=on docker.io/golangci/golangci-lint:${VERSION} \
-	golangci-lint run --verbose --print-resources-usage \
-	--modules-download-mode=vendor --timeout=15m0s ${extra_flags} && \
-	echo "lint OK!"
\ No newline at end of file
+if [ "$1" = "run-natively" ]; then
+  mkdir -p /tmp/local/bin/
+  curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b /tmp/local/bin/ "${GOLANGCI_LINT_VERSION}"
+  mkdir -p /tmp/golangci-cache
+  export GOLANGCI_LINT_CACHE=/tmp/golangci-cache
+  /tmp/local/bin/golangci-lint run "${extra_flags[@]}" && \
+  echo "lint OK!"
+else
+  $1 run --security-opt label=disable --rm \
+    -v  "${HOME}"/.cache/golangci-lint:/cache -e GOLANGCI_LINT_CACHE=/cache \
+    -v "$(pwd)":/app -w /app -e GO111MODULE=on docker.io/golangci/golangci-lint:"${VERSION}" \
+    golangci-lint run "${extra_flags[@]}" && \
+    echo "lint OK!"
+fi
+
diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go
index 4cbd00d18f..70ec7a58e2 100644
--- a/go-controller/pkg/clustermanager/egressip_controller.go
+++ b/go-controller/pkg/clustermanager/egressip_controller.go
@@ -129,6 +129,15 @@ func (eIPC *egressIPClusterController) getAllocationTotalCount() float64 {
 	return float64(count)
 }
 
+func (e *egressNode) hasAllocatedEgressIP(name string, eip string) bool {
+	for ip, egressIPName := range e.allocations {
+		if egressIPName == name && ip == eip {
+			return true
+		}
+	}
+	return false
+}
+
 // nodeAllocator contains all the information required to manage EgressIP assignment to egress node. This includes assignment
 // of EgressIP IPs to nodes and ensuring the egress nodes are reachable. For cloud nodes, it also tracks limits for
 // IP assignment to each node.
@@ -865,6 +874,7 @@ func (eIPC *egressIPClusterController) addAllocatorEgressIPAssignments(name stri
 	defer eIPC.nodeAllocator.Unlock()
 	for _, status := range statusAssignments {
 		if eNode, exists := eIPC.nodeAllocator.cache[status.Node]; exists {
+			klog.V(5).Infof("Setting egress IP node allocation - node: %s, EIP name: %s, IP: %s", eNode.name, name, status.EgressIP)
 			eNode.allocations[status.EgressIP] = name
 		}
 	}
@@ -1423,6 +1433,10 @@ func (eIPC *egressIPClusterController) validateEgressIPStatus(name string, items
 				klog.Errorf("Allocator error: EgressIP: %s claims multiple egress IPs on same node: %s, will attempt rebalancing", name, eIPStatus.Node)
 				validAssignment = false
 			}
+			if !eNode.hasAllocatedEgressIP(name, eIPStatus.EgressIP) {
+				klog.Errorf("Allocator error: EgressIP: %s has mistmach with status vs cache for node: %s with IP: %s", name, eIPStatus.Node, eIPStatus.EgressIP)
+				validAssignment = false
+			}
 			if !eNode.isEgressAssignable {
 				klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which does not have egress label, will attempt rebalancing", name, eIPStatus.Node)
 				validAssignment = false
diff --git a/go-controller/pkg/clustermanager/egressip_controller_test.go b/go-controller/pkg/clustermanager/egressip_controller_test.go
index 7f47c2d25d..ed64e59740 100644
--- a/go-controller/pkg/clustermanager/egressip_controller_test.go
+++ b/go-controller/pkg/clustermanager/egressip_controller_test.go
@@ -3,8 +3,12 @@ package clustermanager
 import (
 	"context"
 	"fmt"
+	"maps"
 	"net"
+	"reflect"
+	"slices"
 	"strconv"
+	"strings"
 	"sync"
 	"time"
 
@@ -296,6 +300,62 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() {
 		return nodes[0]
 	}
 
+	egressIPsMatch := func(expectedEgressIPs []egressipv1.EgressIP) func() bool {
+		return func() (result bool) {
+			egressIPList, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().List(context.TODO(), metav1.ListOptions{})
+			gomega.Expect(err).NotTo(gomega.HaveOccurred())
+			gotEgressIPs := egressIPList.Items
+
+			defer func() {
+				if !result {
+					ginkgo.GinkgoWriter.Printf("Mismatch in egressIP lists, got: %+v, expected: %+v\n", gotEgressIPs, expectedEgressIPs)
+				}
+			}()
+
+			if len(expectedEgressIPs) != len(gotEgressIPs) {
+				return false
+			}
+
+			numEquals := 0
+			for _, expectedEgressIP := range expectedEgressIPs {
+				for _, gotEgressIP := range gotEgressIPs {
+					if expectedEgressIP.Name != gotEgressIP.Name {
+						continue
+					}
+
+					specExpectedEIP := slices.Clone(expectedEgressIP.Spec.EgressIPs)
+					specGotEgressIP := slices.Clone(gotEgressIP.Spec.EgressIPs)
+					slices.Sort(specExpectedEIP)
+					slices.Sort(specGotEgressIP)
+					if !reflect.DeepEqual(specExpectedEIP, specGotEgressIP) {
+						return false
+					}
+
+					statusExpectedEgressIP := slices.Clone(expectedEgressIP.Status.Items)
+					statusGotEgressIP := slices.Clone(gotEgressIP.Status.Items)
+					sortFunc := func(a, b egressipv1.EgressIPStatusItem) int {
+						return strings.Compare(a.EgressIP, b.EgressIP)
+					}
+					slices.SortFunc(statusExpectedEgressIP, sortFunc)
+					slices.SortFunc(statusGotEgressIP, sortFunc)
+					if !reflect.DeepEqual(statusExpectedEgressIP, statusGotEgressIP) {
+						return false
+					}
+					numEquals++
+				}
+			}
+			return len(expectedEgressIPs) == numEquals
+		}
+	}
+
+	readAllocations := func(nodeName string) map[string]string {
+		fakeClusterManagerOVN.eIPC.nodeAllocator.Lock()
+		defer fakeClusterManagerOVN.eIPC.nodeAllocator.Unlock()
+		node, ok := fakeClusterManagerOVN.eIPC.nodeAllocator.cache[nodeName]
+		gomega.Expect(ok).To(gomega.BeTrue(), fmt.Sprintf("node %s missing from allocator cache", nodeName))
+		return maps.Clone(node.allocations)
+	}
+
 	ginkgo.BeforeEach(func() {
 		// Restore global default values before each testcase
 		gomega.Expect(config.PrepareTestConfig()).To(gomega.Succeed())
@@ -3264,15 +3324,21 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() {
 
 	ginkgo.Context("syncEgressIP for dual-stack", func() {
 
-		ginkgo.It("should not update valid assignments", func() {
-			// FIXME(mk): this test doesn't ensure that the EIP status does not get patched during the test run
-			// and therefore is an invalid test to test that the status is not patched
+		// This test validates that if the allocator cache contains valid entries that match
+		// the egress IP status items, no reassignment shall happen.
+		// In order to do so, it does 2 comparisons:
+		// a) take egressIP that's passed to the cluster manager and compare it to the list of EgressIPs after running
+		//    WatchEgressIP.
+		// b) take egressNode.allocations before running WatchEgressIP and make sure that they haven't changed.
+		ginkgo.It("should not reallocate if cache matches EgressIP status", func() {
 			app.Action = func(*cli.Context) error {
 				config.IPv6Mode = true
 				config.IPv4Mode = true
 				egressIPv4 := "192.168.126.101"
 				egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d"
-				node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64"
+				node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0b/64"
+				node2IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64"
+				node1IPv4 := "192.168.126.50/16"
 				node2IPv4 := "192.168.126.51/16"
 
 				node1 := corev1.Node{
@@ -3318,8 +3384,15 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() {
 					},
 				}
 
-				egressNode1 := setupNode(node1Name, []string{node1IPv6}, map[string]string{})
-				egressNode2 := setupNode(node2Name, []string{node2IPv4}, map[string]string{"192.168.126.102": "bogus3"})
+				egressNode1 := setupNode(node1Name, []string{node1IPv4, node1IPv6}, map[string]string{
+					net.ParseIP(egressIPv6).String(): egressIPName, // Cache matches eIP Status.
+				})
+				egressNode2 := setupNode(node2Name, []string{node2IPv4, node2IPv6}, map[string]string{
+					"192.168.126.102": "bogus3",
+					egressIPv4:        egressIPName, // Cache matches eIP Status.
+				})
+				egressNode1OriginalAllocations := maps.Clone(egressNode1.allocations)
+				egressNode2OriginalAllocations := maps.Clone(egressNode2.allocations)
 
 				eIP := egressipv1.EgressIP{
 					ObjectMeta: newEgressIPMeta(egressIPName),
@@ -3350,11 +3423,11 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() {
 
 				_, err := fakeClusterManagerOVN.eIPC.WatchEgressIP()
 				gomega.Expect(err).NotTo(gomega.HaveOccurred())
-
-				gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2))
-				egressIPs, nodes := getEgressIPStatus(egressIPName)
-				gomega.Expect(nodes).To(gomega.ConsistOf(eIP.Status.Items[0].Node, eIP.Status.Items[1].Node))
-				gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP))
+				gomega.Eventually(egressIPsMatch([]egressipv1.EgressIP{eIP})).Should(gomega.BeTrue())
+				gomega.Eventually(readAllocations).WithArguments(egressNode1.name).Should(
+					gomega.Equal(egressNode1OriginalAllocations))
+				gomega.Eventually(readAllocations).WithArguments(egressNode2.name).Should(
+					gomega.Equal(egressNode2OriginalAllocations))
 				return nil
 			}
 
@@ -3453,6 +3526,50 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() {
 				egressIPs, nodes := getEgressIPStatus(egressIPName)
 				gomega.Expect(nodes).To(gomega.ConsistOf(egressNode1.name, egressNode2.name))
 				gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP))
+				// give some time for event handler to be added and finish processing initial updates
+				time.Sleep(3 * time.Second)
+				realEIP, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), egressIPName, metav1.GetOptions{})
+				gomega.Expect(err).NotTo(gomega.HaveOccurred())
+				ginkgo.By("Simulate lagging informer and send an event with outdated status")
+				eIP.Annotations = make(map[string]string)
+				for k, v := range realEIP.Annotations {
+					eIP.Annotations[k] = v
+				}
+				eIP.Status = egressipv1.EgressIPStatus{
+					Items: []egressipv1.EgressIPStatusItem{
+						{
+							EgressIP: egressIP2,
+							Node:     egressNode1.name,
+						},
+						{
+							EgressIP: egressIP1,
+							Node:     egressNode1.name,
+						},
+					},
+				}
+				_, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), &eIP, metav1.UpdateOptions{})
+				gomega.Expect(err).NotTo(gomega.HaveOccurred())
+				// give sometime for events to be processed
+				time.Sleep(3 * time.Second)
+				ginkgo.By("Simulate lagging informer and send an event with real updated status")
+				_, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), realEIP, metav1.UpdateOptions{})
+				gomega.Expect(err).NotTo(gomega.HaveOccurred())
+				// give sometime for events to be processed
+				time.Sleep(3 * time.Second)
+				gomega.Eventually(func() error {
+					defer ginkgo.GinkgoRecover()
+					tmp, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), egressIPName, metav1.GetOptions{})
+					gomega.Expect(err).NotTo(gomega.HaveOccurred())
+					var egressIPs, nodes []string
+					for _, status := range tmp.Status.Items {
+						egressIPs = append(egressIPs, status.EgressIP)
+						nodes = append(nodes, status.Node)
+					}
+					gomega.Expect(nodes).To(gomega.ConsistOf(egressNode1.name, egressNode2.name))
+					gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP))
+					return nil
+				}).Should(gomega.Succeed())
+
 				return nil
 			}
 
@@ -3956,7 +4073,13 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() {
 			gomega.Expect(err).NotTo(gomega.HaveOccurred())
 		})
 
-		ginkgo.It("should not update valid assignment", func() {
+		// This test validates that if the allocator cache contains valid entries that match
+		// the egress IP status items, no reassignment shall happen.
+		// In order to do so, it does 2 comparisons:
+		// a) take egressIP that's passed to the cluster manager and compare it to the list of EgressIPs after running
+		//    WatchEgressIP.
+		// b) take egressNode.allocations before running WatchEgressIP and make sure that they haven't changed.
+		ginkgo.It("should not reallocate if cache matches EgressIP status", func() {
 			app.Action = func(*cli.Context) error {
 				egressIP1 := "192.168.126.101"
 				node1IPv4 := "192.168.126.12/24"
@@ -4005,8 +4128,15 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() {
 					},
 				}
 
-				egressNode1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.111": "bogus2"})
-				egressNode2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"})
+				egressNode1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{
+					"192.168.126.111": "bogus2",
+					egressIP1:         egressIPName, // Cache matches eIP Status.
+				})
+				egressNode2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{
+					"192.168.126.68": "bogus3",
+				})
+				egressNode1OriginalAllocations := maps.Clone(egressNode1.allocations)
+				egressNode2OriginalAllocations := maps.Clone(egressNode2.allocations)
 
 				eIP := egressipv1.EgressIP{
 					ObjectMeta: newEgressIPMeta(egressIPName),
@@ -4033,10 +4163,12 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() {
 
 				_, err := fakeClusterManagerOVN.eIPC.WatchEgressIP()
 				gomega.Expect(err).NotTo(gomega.HaveOccurred())
-				gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1))
-				egressIPs, nodes := getEgressIPStatus(egressIPName)
-				gomega.Expect(nodes[0]).To(gomega.Equal(egressNode1.name))
-				gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1))
+
+				gomega.Eventually(egressIPsMatch([]egressipv1.EgressIP{eIP})).Should(gomega.BeTrue())
+				gomega.Eventually(readAllocations).WithArguments(egressNode1.name).Should(
+					gomega.Equal(egressNode1OriginalAllocations))
+				gomega.Eventually(readAllocations).WithArguments(egressNode2.name).Should(
+					gomega.Equal(egressNode2OriginalAllocations))
 				return nil
 			}
 
diff --git a/go-controller/pkg/controllermanager/node_controller_manager.go b/go-controller/pkg/controllermanager/node_controller_manager.go
index e183e3b3fc..34cfda589e 100644
--- a/go-controller/pkg/controllermanager/node_controller_manager.go
+++ b/go-controller/pkg/controllermanager/node_controller_manager.go
@@ -75,14 +75,18 @@ func (ncm *NodeControllerManager) CleanupStaleNetworks(validNetworks ...util.Net
 	if !util.IsNetworkSegmentationSupportEnabled() {
 		return nil
 	}
-	validVRFDevices := make(sets.Set[string])
-	for _, network := range validNetworks {
-		if !network.IsPrimaryNetwork() {
-			continue
+	// in DPU mode, vrfManager would be nil
+	if ncm.vrfManager != nil {
+		validVRFDevices := make(sets.Set[string])
+		for _, network := range validNetworks {
+			if !network.IsPrimaryNetwork() {
+				continue
+			}
+			validVRFDevices.Insert(util.GetNetworkVRFName(network))
 		}
-		validVRFDevices.Insert(util.GetNetworkVRFName(network))
+		return ncm.vrfManager.Repair(validVRFDevices)
 	}
-	return ncm.vrfManager.Repair(validVRFDevices)
+	return nil
 }
 
 // newCommonNetworkControllerInfo creates and returns the base node network controller info
@@ -126,7 +130,7 @@ func NewNodeControllerManager(ovnClient *util.OVNClientset, wf factory.NodeWatch
 			return nil, err
 		}
 	}
-	if util.IsNetworkSegmentationSupportEnabled() {
+	if util.IsNetworkSegmentationSupportEnabled() && config.OvnKubeNode.Mode != ovntypes.NodeModeDPU {
 		ncm.vrfManager = vrfmanager.NewController(ncm.routeManager)
 		ncm.ruleManager = iprulemanager.NewController(config.IPv4Mode, config.IPv6Mode)
 	}
diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/shared.go b/go-controller/pkg/crd/userdefinednetwork/v1/shared.go
index 9787415ddc..ac1a4c95ca 100644
--- a/go-controller/pkg/crd/userdefinednetwork/v1/shared.go
+++ b/go-controller/pkg/crd/userdefinednetwork/v1/shared.go
@@ -102,6 +102,8 @@ type Layer3Subnet struct {
 // +kubebuilder:validation:XValidation:rule="!has(self.reservedSubnets) || self.reservedSubnets.all(e, self.subnets.exists(s, cidr(s).containsCIDR(cidr(e))))",message="reservedSubnets must be subnetworks of the networks specified in the subnets field",fieldPath=".reservedSubnets"
 // +kubebuilder:validation:XValidation:rule="!has(self.infrastructureSubnets) || self.infrastructureSubnets.all(e, self.subnets.exists(s, cidr(s).containsCIDR(cidr(e))))",message="infrastructureSubnets must be subnetworks of the networks specified in the subnets field",fieldPath=".infrastructureSubnets"
 // +kubebuilder:validation:XValidation:rule="!has(self.infrastructureSubnets) || !has(self.reservedSubnets) || self.infrastructureSubnets.all(infra, !self.reservedSubnets.exists(reserved, cidr(infra).containsCIDR(reserved) || cidr(reserved).containsCIDR(infra)))", message="infrastructureSubnets and reservedSubnets must not overlap"
+// +kubebuilder:validation:XValidation:rule="!has(self.infrastructureSubnets) || self.infrastructureSubnets.all(s, isCIDR(s) && cidr(s) == cidr(s).masked())", message="infrastructureSubnets must be a masked network address (no host bits set)"
+// +kubebuilder:validation:XValidation:rule="!has(self.reservedSubnets) || self.reservedSubnets.all(s, isCIDR(s) && cidr(s) == cidr(s).masked())", message="reservedSubnets must be a masked network address (no host bits set)"
 type Layer2Config struct {
 	// Role describes the network role in the pod.
 	//
diff --git a/go-controller/pkg/node/bridgeconfig/bridgeflows.go b/go-controller/pkg/node/bridgeconfig/bridgeflows.go
index 76bc6ef597..78cf8a2c42 100644
--- a/go-controller/pkg/node/bridgeconfig/bridgeflows.go
+++ b/go-controller/pkg/node/bridgeconfig/bridgeflows.go
@@ -411,9 +411,9 @@ func (b *BridgeConfiguration) flowsForDefaultBridge(extraIPs []net.IP) ([]string
 
 			dftFlows = append(dftFlows,
 				fmt.Sprintf("cookie=%s, priority=250, table=2, %s, pkt_mark=%s, "+
-					"actions=set_field:%s->eth_dst,output:%s",
+					"actions=set_field:%s->eth_dst,%soutput:%s",
 					nodetypes.DefaultOpenFlowCookie, protoPrefixV4, netConfig.PktMark,
-					bridgeMacAddress, netConfig.OfPortPatch))
+					bridgeMacAddress, mod_vlan_id, netConfig.OfPortPatch))
 		}
 	}
 
@@ -448,9 +448,9 @@ func (b *BridgeConfiguration) flowsForDefaultBridge(extraIPs []net.IP) ([]string
 					netConfig.V6MasqIPs.ManagementPort.IP.String()))
 			dftFlows = append(dftFlows,
 				fmt.Sprintf("cookie=%s, priority=250, table=2, %s, pkt_mark=%s, "+
-					"actions=set_field:%s->eth_dst,output:%s",
+					"actions=set_field:%s->eth_dst,%soutput:%s",
 					nodetypes.DefaultOpenFlowCookie, protoPrefixV6, netConfig.PktMark,
-					bridgeMacAddress, netConfig.OfPortPatch))
+					bridgeMacAddress, mod_vlan_id, netConfig.OfPortPatch))
 		}
 	}
 
diff --git a/go-controller/pkg/node/controllers/egressip/egressip.go b/go-controller/pkg/node/controllers/egressip/egressip.go
index 3c6b340bbf..dd1f15f3c9 100644
--- a/go-controller/pkg/node/controllers/egressip/egressip.go
+++ b/go-controller/pkg/node/controllers/egressip/egressip.go
@@ -45,6 +45,7 @@ import (
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
+	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/egressip"
 	utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors"
 )
 
@@ -539,15 +540,15 @@ func (c *Controller) processEIP(eip *eipv1.EgressIP) (*eIPConfig, sets.Set[strin
 		if isValid := isEIPStatusItemValid(status, c.nodeName); !isValid {
 			continue
 		}
-		eIPNet, err := util.GetIPNetFullMask(status.EgressIP)
-		if err != nil {
+		ip := net.ParseIP(status.EgressIP)
+		if ip == nil {
 			return nil, selectedNamespaces, selectedPods, selectedNamespacesPodIPs,
-				fmt.Errorf("failed to generate mask for EgressIP %s IP %s: %v", eip.Name, status.EgressIP, err)
+				fmt.Errorf("failed to parse EgressIP %s IP %s", eip.Name, status.EgressIP)
 		}
-		if util.IsOVNNetwork(parsedNodeEIPConfig, eIPNet.IP) {
+		if util.IsOVNNetwork(parsedNodeEIPConfig, ip) {
 			continue
 		}
-		found, link, err := findLinkOnSameNetworkAsIP(eIPNet.IP, c.v4, c.v6)
+		found, link, err := findLinkOnSameNetworkAsIP(ip, c.v4, c.v6)
 		if err != nil {
 			return nil, selectedNamespaces, selectedPods, selectedNamespacesPodIPs,
 				fmt.Errorf("failed to find a network to host EgressIP %s IP %s: %v", eip.Name, status.EgressIP, err)
@@ -560,7 +561,7 @@ func (c *Controller) processEIP(eip *eipv1.EgressIP) (*eIPConfig, sets.Set[strin
 		if err != nil {
 			return nil, selectedNamespaces, selectedPods, selectedNamespacesPodIPs, fmt.Errorf("failed to list namespaces: %w", err)
 		}
-		isEIPV6 := utilnet.IsIPv6(eIPNet.IP)
+		isEIPV6 := utilnet.IsIPv6(ip)
 		for _, namespace := range namespaces {
 			netInfo, err := c.getActiveNetworkForNamespace(namespace.Name)
 			if err != nil {
@@ -593,13 +594,13 @@ func (c *Controller) processEIP(eip *eipv1.EgressIP) (*eIPConfig, sets.Set[strin
 				if selectedNamespacesPodIPs[namespace.Name] == nil {
 					selectedNamespacesPodIPs[namespace.Name] = make(map[ktypes.NamespacedName]*podIPConfigList)
 				}
-				selectedNamespacesPodIPs[namespace.Name][podNamespaceName] = generatePodConfig(ips, link, eIPNet, isEIPV6)
+				selectedNamespacesPodIPs[namespace.Name][podNamespaceName] = generatePodConfig(ips, link, ip, isEIPV6)
 				selectedPods.Insert(podNamespaceName)
 			}
 		}
 		// ensure at least one pod is selected before generating config
 		if len(selectedNamespacesPodIPs) > 0 {
-			eipSpecificConfig, err = generateEIPConfig(link, eIPNet, isEIPV6)
+			eipSpecificConfig, err = generateEIPConfig(link, ip, isEIPV6)
 			if err != nil {
 				return nil, selectedNamespaces, selectedPods, selectedNamespacesPodIPs,
 					fmt.Errorf("failed to generate EIP configuration for EgressIP %s IP %s: %v", eip.Name, status.EgressIP, err)
@@ -611,7 +612,7 @@ func (c *Controller) processEIP(eip *eipv1.EgressIP) (*eIPConfig, sets.Set[strin
 	return eipSpecificConfig, selectedNamespaces, selectedPods, selectedNamespacesPodIPs, nil
 }
 
-func generatePodConfig(podIPs []net.IP, link netlink.Link, eIPNet *net.IPNet, isEIPV6 bool) *podIPConfigList {
+func generatePodConfig(podIPs []net.IP, link netlink.Link, eIP net.IP, isEIPV6 bool) *podIPConfigList {
 	newPodIPConfigs := newPodIPConfigList()
 	for _, podIP := range podIPs {
 		isPodIPv6 := utilnet.IsIPv6(podIP)
@@ -619,7 +620,7 @@ func generatePodConfig(podIPs []net.IP, link netlink.Link, eIPNet *net.IPNet, is
 			continue
 		}
 		ipConfig := newPodIPConfig()
-		ipConfig.ipTableRule = generateIPTablesSNATRuleArg(podIP, isPodIPv6, link.Attrs().Name, eIPNet.IP.String())
+		ipConfig.ipTableRule = generateIPTablesSNATRuleArg(podIP, isPodIPv6, link.Attrs().Name, eIP.String())
 		ipConfig.ipRule = generateIPRule(podIP, isPodIPv6, link.Attrs().Index)
 		ipConfig.v6 = isPodIPv6
 		newPodIPConfigs.elems = append(newPodIPConfigs.elems, ipConfig)
@@ -628,14 +629,14 @@ func generatePodConfig(podIPs []net.IP, link netlink.Link, eIPNet *net.IPNet, is
 }
 
 // generateEIPConfig generates configuration that isn't related to any pod EIPs to support config of a single EIP
-func generateEIPConfig(link netlink.Link, eIPNet *net.IPNet, isEIPV6 bool) (*eIPConfig, error) {
+func generateEIPConfig(link netlink.Link, eIP net.IP, isEIPV6 bool) (*eIPConfig, error) {
 	eipConfig := newEIPConfig()
 	linkRoutes, err := generateRoutesForLink(link, isEIPV6)
 	if err != nil {
 		return nil, err
 	}
 	eipConfig.routes = linkRoutes
-	eipConfig.addr = getNetlinkAddress(eIPNet, link.Attrs().Index)
+	eipConfig.addr = egressip.GetNetlinkAddress(eIP, link.Attrs().Index)
 	return eipConfig, nil
 }
 
@@ -1482,14 +1483,6 @@ func isLinkUp(flags string) bool {
 	return strings.Contains(flags, "up")
 }
 
-func getNetlinkAddress(addr *net.IPNet, ifindex int) *netlink.Addr {
-	return &netlink.Addr{
-		IPNet:     addr,
-		Scope:     int(netlink.SCOPE_UNIVERSE),
-		LinkIndex: ifindex,
-	}
-}
-
 // generateIPRules generates IP rules at a predefined priority for each pod IP with a custom routing table based
 // from the links 'ifindex'
 func generateIPRule(srcIP net.IP, isIPv6 bool, ifIndex int) netlink.Rule {
diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go
index 9e0d28b582..e3923fcc54 100644
--- a/go-controller/pkg/node/default_node_network_controller.go
+++ b/go-controller/pkg/node/default_node_network_controller.go
@@ -153,7 +153,7 @@ func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, sto
 		routeManager: routeManager,
 		ovsClient:    ovsClient,
 	}
-	if util.IsNetworkSegmentationSupportEnabled() {
+	if util.IsNetworkSegmentationSupportEnabled() && config.OvnKubeNode.Mode != types.NodeModeDPU {
 		c.udnHostIsolationManager = NewUDNHostIsolationManager(config.IPv4Mode, config.IPv6Mode,
 			cnnci.watchFactory.PodCoreInformer(), cnnci.name, cnnci.recorder)
 	}
@@ -936,6 +936,9 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error {
 		if err != nil {
 			return err
 		}
+	}
+
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
 		if nc.udnHostIsolationManager != nil {
 			if err = nc.udnHostIsolationManager.Start(ctx); err != nil {
 				return err
@@ -980,9 +983,11 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error {
 
 	nodeAnnotator := kube.NewNodeAnnotator(nc.Kube, node.Name)
 
-	// Use the device from environment when the DP resource name is specified.
-	if err := configureMgmtPortNetdevFromResource(); err != nil {
-		return err
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
+		// Use the device from environment when the DP resource name is specified.
+		if err := configureMgmtPortNetdevFromResource(); err != nil {
+			return err
+		}
 	}
 
 	if config.OvnKubeNode.Mode == types.NodeModeDPUHost {
@@ -1043,6 +1048,11 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error {
 			return err
 		}
 		nc.Gateway = gw
+	} else {
+		err = nc.initGatewayDPUHostPreStart(nc.nodeAddress, nodeAnnotator)
+		if err != nil {
+			return err
+		}
 	}
 
 	if err := level.Set(strconv.Itoa(config.Logging.Level)); err != nil {
@@ -1081,7 +1091,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error {
 
 	// Complete gateway initialization
 	if config.OvnKubeNode.Mode == types.NodeModeDPUHost {
-		err = nc.initGatewayDPUHost(nc.nodeAddress, nodeAnnotator)
+		err = nc.initGatewayDPUHost()
 		if err != nil {
 			return err
 		}
diff --git a/go-controller/pkg/node/egressip/gateway_egressip.go b/go-controller/pkg/node/egressip/gateway_egressip.go
index 38bd2b058e..27700e026e 100644
--- a/go-controller/pkg/node/egressip/gateway_egressip.go
+++ b/go-controller/pkg/node/egressip/gateway_egressip.go
@@ -3,13 +3,9 @@ package egressip
 import (
 	"encoding/json"
 	"fmt"
-	"math"
 	"net"
 	"sync"
 
-	"github.com/vishvananda/netlink"
-	"golang.org/x/sys/unix"
-
 	"k8s.io/apimachinery/pkg/util/sets"
 	corev1informers "k8s.io/client-go/informers/core/v1"
 	corev1listers "k8s.io/client-go/listers/core/v1"
@@ -23,6 +19,7 @@ import (
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/linkmanager"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
+	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/egressip"
 )
 
 // markIPs contains packet mark and associated EgressIP IP for IPv4 / IPv6. Key is packet mark, value egress IP
@@ -126,6 +123,7 @@ func (mic *MarkIPsCache) deleteMarkIP(pktMark util.EgressIPMark, ip net.IP) {
 func (mic *MarkIPsCache) replaceAll(markIPs markIPs) {
 	mic.mu.Lock()
 	mic.markToIPs = markIPs
+	mic.IPToMark = make(map[string]int, len(markIPs.v4)+len(markIPs.v6))
 	for mark, ipv4 := range markIPs.v4 {
 		mic.IPToMark[ipv4] = mark
 	}
@@ -451,7 +449,7 @@ func (g *BridgeEIPAddrManager) addIPBridge(ip net.IP) error {
 	if err != nil {
 		return fmt.Errorf("failed to get link obj by name %s: %v", g.bridgeName, err)
 	}
-	return g.addrManager.AddAddress(getEIPBridgeNetlinkAddress(ip, link.Attrs().Index))
+	return g.addrManager.AddAddress(*egressip.GetNetlinkAddress(ip, link.Attrs().Index))
 }
 
 func (g *BridgeEIPAddrManager) deleteIPBridge(ip net.IP) error {
@@ -459,7 +457,7 @@ func (g *BridgeEIPAddrManager) deleteIPBridge(ip net.IP) error {
 	if err != nil {
 		return fmt.Errorf("failed to get link obj by name %s: %v", g.bridgeName, err)
 	}
-	return g.addrManager.DelAddress(getEIPBridgeNetlinkAddress(ip, link.Attrs().Index))
+	return g.addrManager.DelAddress(*egressip.GetNetlinkAddress(ip, link.Attrs().Index))
 }
 
 // getAnnotationIPs retrieves the egress IP annotation from the current node Nodes object. If multiple users, callers must synchronise.
@@ -514,29 +512,3 @@ func getIPsStr(ips ...net.IP) []string {
 	}
 	return ipsStr
 }
-
-func getEIPBridgeNetlinkAddress(ip net.IP, ifindex int) netlink.Addr {
-	return netlink.Addr{
-		IPNet:     &net.IPNet{IP: ip, Mask: util.GetIPFullMask(ip)},
-		Flags:     getEIPNetlinkAddressFlag(ip),
-		Scope:     int(netlink.SCOPE_UNIVERSE),
-		ValidLft:  getEIPNetlinkAddressValidLft(ip),
-		LinkIndex: ifindex,
-	}
-}
-
-func getEIPNetlinkAddressFlag(ip net.IP) int {
-	// isV6?
-	if ip.To4() == nil && ip.To16() != nil {
-		return unix.IFA_F_NODAD
-	}
-	return 0
-}
-
-func getEIPNetlinkAddressValidLft(ip net.IP) int {
-	// isV6?
-	if ip.To4() == nil && ip.To16() != nil {
-		return math.MaxUint32
-	}
-	return 0
-}
diff --git a/go-controller/pkg/node/egressip/gateway_egressip_test.go b/go-controller/pkg/node/egressip/gateway_egressip_test.go
index 07a03a87b6..1fe48a6f5b 100644
--- a/go-controller/pkg/node/egressip/gateway_egressip_test.go
+++ b/go-controller/pkg/node/egressip/gateway_egressip_test.go
@@ -20,6 +20,7 @@ import (
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/linkmanager"
 	netlink_mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
+	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/egressip"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/mocks"
 )
 
@@ -63,7 +64,7 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			nlMock.On("LinkByIndex", bridgeLinkIndex).Return(nlLinkMock, nil)
 			nlMock.On("LinkList").Return([]netlink.Link{nlLinkMock}, nil)
 			nlMock.On("AddrList", nlLinkMock, 0).Return([]netlink.Addr{}, nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, emptyAnnotation)
 			defer stopFn()
 			eip := getEIPAssignedToNode(nodeName, mark, ipV4Addr)
@@ -74,11 +75,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr))
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 
 		ginkgo.It("doesn't configure or fail when annotation mark isn't found", func() {
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, emptyAnnotation)
 			defer stopFn()
 			eip := getEIPAssignedToNode(nodeName, "", ipV4Addr)
@@ -89,11 +90,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr))
 			gomega.Expect(nlMock.AssertNotCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 
 		ginkgo.It("fails when invalid annotation mark", func() {
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, emptyAnnotation)
 			defer stopFn()
 			eip := getEIPAssignedToNode(nodeName, "not-an-integer", ipV4Addr)
@@ -104,7 +105,7 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr))
 			gomega.Expect(nlMock.AssertNotCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 
 		ginkgo.It("configures annotations with existing entries", func() {
@@ -113,7 +114,7 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			nlMock.On("LinkByIndex", bridgeLinkIndex).Return(nlLinkMock, nil)
 			nlMock.On("LinkList").Return([]netlink.Link{nlLinkMock}, nil)
 			nlMock.On("AddrList", nlLinkMock, 0).Return([]netlink.Addr{}, nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, generateAnnotFromIPs(ipV4Addr2))
 			defer stopFn()
 			eip := getEIPAssignedToNode(nodeName, mark, ipV4Addr)
@@ -124,7 +125,7 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2))
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 	})
 
@@ -135,7 +136,7 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			nlMock.On("LinkByIndex", bridgeLinkIndex).Return(nlLinkMock, nil)
 			nlMock.On("LinkList").Return([]netlink.Link{nlLinkMock}, nil)
 			nlMock.On("AddrList", nlLinkMock, 0).Return([]netlink.Addr{}, nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, emptyAnnotation)
 			defer stopFn()
 			assignedEIP := getEIPAssignedToNode(nodeName, mark, ipV4Addr)
@@ -147,7 +148,7 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr))
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 
 		ginkgo.It("removes EgressIP previously assigned", func() {
@@ -156,8 +157,8 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			nlMock.On("LinkByIndex", bridgeLinkIndex).Return(nlLinkMock, nil)
 			nlMock.On("LinkList").Return([]netlink.Link{nlLinkMock}, nil)
 			nlMock.On("AddrList", nlLinkMock, 0).Return([]netlink.Addr{}, nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
-			nlMock.On("AddrDel", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrDel", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, emptyAnnotation)
 			defer stopFn()
 			assignedEIP := getEIPAssignedToNode(nodeName, mark, ipV4Addr)
@@ -172,9 +173,9 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr))
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 
 		ginkgo.It("reconfigures from an old to a new IP", func() {
@@ -183,9 +184,9 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			nlMock.On("LinkByIndex", bridgeLinkIndex).Return(nlLinkMock, nil)
 			nlMock.On("LinkList").Return([]netlink.Link{nlLinkMock}, nil)
 			nlMock.On("AddrList", nlLinkMock, 0).Return([]netlink.Addr{}, nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr2), bridgeLinkIndex)).Return(nil)
-			nlMock.On("AddrDel", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr2), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrDel", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, emptyAnnotation)
 			defer stopFn()
 			unassignedEIP := getEIPNotAssignedToNode(mark, ipV4Addr)
@@ -201,11 +202,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr2))
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr2), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr2), bridgeLinkIndex))).Should(gomega.BeTrue())
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 	})
 
@@ -216,8 +217,8 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			nlMock.On("LinkByIndex", bridgeLinkIndex).Return(nlLinkMock, nil)
 			nlMock.On("LinkList").Return([]netlink.Link{nlLinkMock}, nil)
 			nlMock.On("AddrList", nlLinkMock, 0).Return([]netlink.Addr{}, nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
-			nlMock.On("AddrDel", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrDel", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, emptyAnnotation)
 			defer stopFn()
 			eip := getEIPAssignedToNode(nodeName, mark, ipV4Addr)
@@ -231,9 +232,9 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr))
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 
 		ginkgo.It("does not update when EIP is deleted that wasn't assigned to the node", func() {
@@ -247,7 +248,7 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr2))
 			gomega.Expect(nlMock.AssertNotCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 	})
 
@@ -258,8 +259,8 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			nlMock.On("LinkByIndex", bridgeLinkIndex).Return(nlLinkMock, nil)
 			nlMock.On("LinkList").Return([]netlink.Link{nlLinkMock}, nil)
 			nlMock.On("AddrList", nlLinkMock, 0).Return([]netlink.Addr{}, nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr2), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr2), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, emptyAnnotation)
 			defer stopFn()
 			eipAssigned1 := getEIPAssignedToNode(nodeName, mark, ipV4Addr)
@@ -271,9 +272,9 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2))
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr2), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr2), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 
 		ginkgo.It("delete previous configuration", func() {
@@ -282,9 +283,9 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			nlMock.On("LinkByIndex", bridgeLinkIndex).Return(nlLinkMock, nil)
 			nlMock.On("LinkList").Return([]netlink.Link{nlLinkMock}, nil)
 			nlMock.On("AddrList", nlLinkMock, 0).Return([]netlink.Addr{}, nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
-			nlMock.On("AddrAdd", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr2), bridgeLinkIndex)).Return(nil)
-			nlMock.On("AddrDel", nlLinkMock, getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr3), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr2), bridgeLinkIndex)).Return(nil)
+			nlMock.On("AddrDel", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr3), bridgeLinkIndex)).Return(nil)
 			addrMgr, stopFn := initBridgeEIPAddrManager(nodeName, bridgeName, generateAnnotFromIPs(ipV4Addr3)) // previously configured IP
 			defer stopFn()
 			eipAssigned1 := getEIPAssignedToNode(nodeName, mark, ipV4Addr)
@@ -295,11 +296,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() {
 			gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi")
 			gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2))
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue())
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr2), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr2), bridgeLinkIndex))).Should(gomega.BeTrue())
 			gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock,
-				getEIPBridgeNetlinkAddressPtr(net.ParseIP(ipV4Addr3), bridgeLinkIndex))).Should(gomega.BeTrue())
+				egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr3), bridgeLinkIndex))).Should(gomega.BeTrue())
 		})
 
 		ginkgo.It("no update or failure when mark is not set", func() {
@@ -387,11 +388,6 @@ func generateAnnotFromIPs(ips ...string) string {
 	return fmt.Sprintf("[%s]", strings.Join(ipsWithQuotes, ","))
 }
 
-func getEIPBridgeNetlinkAddressPtr(ip net.IP, ifindex int) *netlink.Addr {
-	addr := getEIPBridgeNetlinkAddress(ip, ifindex)
-	return &addr
-}
-
 func parseEIPsFromAnnotation(node *corev1.Node) []string {
 	ips, err := util.ParseNodeBridgeEgressIPsAnnotation(node)
 	if err != nil {
diff --git a/go-controller/pkg/node/gateway.go b/go-controller/pkg/node/gateway.go
index 97e7baeecb..4a7416c64e 100644
--- a/go-controller/pkg/node/gateway.go
+++ b/go-controller/pkg/node/gateway.go
@@ -429,15 +429,17 @@ func gatewayInitInternal(nodeName, gwIntf, egressGatewayIntf string, gwNextHops
 		}
 	}
 
-	// Set static FDB entry for sharedGW MAC.
-	// If `GatewayIfaceRep` port is present, use it instead of LOCAL (bridge name).
-	gwport := gatewayBridge.GetBridgeName()                           // Default is LOCAL port for the bridge.
-	if repPort := gatewayBridge.GetGatewayIfaceRep(); repPort != "" { // We have an accelerated switchdev device for GW.
-		gwport = repPort
-	}
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
+		// Set static FDB entry for sharedGW MAC.
+		// If `GatewayIfaceRep` port is present, use it instead of LOCAL (bridge name).
+		gwport := gatewayBridge.GetBridgeName()                           // Default is LOCAL port for the bridge.
+		if repPort := gatewayBridge.GetGatewayIfaceRep(); repPort != "" { // We have an accelerated switchdev device for GW.
+			gwport = repPort
+		}
 
-	if err := util.SetStaticFDBEntry(gatewayBridge.GetBridgeName(), gwport, gatewayBridge.GetMAC()); err != nil {
-		return nil, nil, err
+		if err := util.SetStaticFDBEntry(gatewayBridge.GetBridgeName(), gwport, gatewayBridge.GetMAC()); err != nil {
+			return nil, nil, err
+		}
 	}
 
 	l3GwConfig := util.L3GatewayConfig{
@@ -466,26 +468,46 @@ func (g *gateway) GetGatewayBridgeIface() string {
 }
 
 func (g *gateway) GetGatewayIface() string {
-	return g.openflowManager.defaultBridge.GetGatewayIface()
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
+		if g.openflowManager == nil {
+			return ""
+		}
+		return g.openflowManager.defaultBridge.GetGatewayIface()
+	} else {
+		return config.Gateway.Interface
+	}
 }
 
 // SetDefaultGatewayBridgeMAC updates the mac address for the OFM used to render flows with
 func (g *gateway) SetDefaultGatewayBridgeMAC(macAddr net.HardwareAddr) {
+	if config.OvnKubeNode.Mode == types.NodeModeDPUHost {
+		return
+	}
 	g.openflowManager.setDefaultBridgeMAC(macAddr)
 	klog.Infof("Default gateway bridge MAC address updated to %s", macAddr)
 }
 
 func (g *gateway) SetDefaultPodNetworkAdvertised(isPodNetworkAdvertised bool) {
+	if config.OvnKubeNode.Mode == types.NodeModeDPUHost {
+		return
+	}
 	g.openflowManager.defaultBridge.GetNetworkConfig(types.DefaultNetworkName).Advertised.Store(isPodNetworkAdvertised)
 }
 
 func (g *gateway) GetDefaultPodNetworkAdvertised() bool {
+	if config.OvnKubeNode.Mode == types.NodeModeDPUHost {
+		return false
+	}
 	return g.openflowManager.defaultBridge.GetNetworkConfig(types.DefaultNetworkName).Advertised.Load()
 }
 
 // SetDefaultBridgeGARPDropFlows will enable flows to drop GARPs if the openflow
 // manager has been initialized.
 func (g *gateway) SetDefaultBridgeGARPDropFlows(isDropped bool) {
+	if config.OvnKubeNode.Mode == types.NodeModeDPUHost {
+		return
+	}
+
 	if g.openflowManager == nil {
 		return
 	}
@@ -495,14 +517,22 @@ func (g *gateway) SetDefaultBridgeGARPDropFlows(isDropped bool) {
 // Reconcile handles triggering updates to different components of a gateway, like OFM, Services
 func (g *gateway) Reconcile() error {
 	klog.Info("Reconciling gateway with updates")
-	if err := g.openflowManager.updateBridgeFlowCache(g.nodeIPManager.ListAddresses()); err != nil {
-		return err
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
+		if g.openflowManager != nil {
+			if err := g.openflowManager.updateBridgeFlowCache(g.nodeIPManager.ListAddresses()); err != nil {
+				return err
+			}
+			// let's sync these flows immediately
+			g.openflowManager.requestFlowSync()
+		}
 	}
-	// let's sync these flows immediately
-	g.openflowManager.requestFlowSync()
-	err := g.updateSNATRules()
-	if err != nil {
-		return err
+	// TBD updateSNATRules() gets node host-cidr by accessing gateway.nodeIPManager, which does not
+	// exist in dpu-host mode.
+	if config.OvnKubeNode.Mode == types.NodeModeFull {
+		err := g.updateSNATRules()
+		if err != nil {
+			return err
+		}
 	}
 	// Services create OpenFlow flows as well, need to update them all
 	if g.servicesRetryFramework != nil {
diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go
index b4d11d69cf..659933d419 100644
--- a/go-controller/pkg/node/gateway_init.go
+++ b/go-controller/pkg/node/gateway_init.go
@@ -384,13 +384,9 @@ func interfaceForEXGW(intfName string) string {
 	return intfName
 }
 
-func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP, nodeAnnotator kube.Annotator) error {
-	// A DPU host gateway is complementary to the shared gateway running
-	// on the DPU embedded CPU. it performs some initializations and
-	// watch on services for iptable rule updates and run a loadBalancerHealth checker
-	// Note: all K8s Node related annotations are handled from DPU.
-	klog.Info("Initializing Shared Gateway Functionality on DPU host")
-	var err error
+// TODO(adrianc): revisit if support for nodeIPManager is needed.
+func (nc *DefaultNodeNetworkController) initGatewayDPUHostPreStart(kubeNodeIP net.IP, nodeAnnotator kube.Annotator) error {
+	klog.Info("Initializing Shared Gateway Functionality for Gateway PreStart on DPU host")
 
 	// Find the network interface that has the Kubernetes node IP assigned to it
 	// This interface will be used for DPU host gateway operations
@@ -470,18 +466,43 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP, no
 		return err
 	}
 
-	gw := &gateway{
+	if err = addHostMACBindings(kubeIntf); err != nil {
+		return fmt.Errorf("failed to add MAC bindings for service routing: %w", err)
+	}
+
+	gatewayNextHops, _, err := getGatewayNextHops()
+	if err != nil {
+		return err
+	}
+
+	nc.Gateway = &gateway{
 		initFunc:     func() error { return nil },
 		readyFunc:    func() (bool, error) { return true, nil },
 		watchFactory: nc.watchFactory.(*factory.WatchFactory),
+		nextHops:     gatewayNextHops,
 	}
+	return nil
+}
 
-	// TODO(adrianc): revisit if support for nodeIPManager is needed.
+func (nc *DefaultNodeNetworkController) initGatewayDPUHost() error {
+	// A DPU host gateway is complementary to the shared gateway running
+	// on the DPU embedded CPU. it performs some initializations and
+	// watch on services for iptable rule updates and run a loadBalancerHealth checker
+	// Note: all K8s Node related annotations are handled from DPU.
+	klog.Info("Initializing Shared Gateway Functionality for Gateway Start on DPU host")
+	var err error
 
+	// TODO(adrianc): revisit if support for nodeIPManager is needed.
+	gw := nc.Gateway.(*gateway)
 	if config.Gateway.NodeportEnable {
 		if err := initSharedGatewayIPTables(); err != nil {
 			return err
 		}
+		if util.IsNetworkSegmentationSupportEnabled() {
+			if err := configureUDNServicesNFTables(); err != nil {
+				return fmt.Errorf("unable to configure UDN nftables: %w", err)
+			}
+		}
 		gw.nodePortWatcherIptables = newNodePortWatcherIptables(nc.networkManager)
 		gw.loadBalancerHealthChecker = newLoadBalancerHealthChecker(nc.name, nc.watchFactory)
 		portClaimWatcher, err := newPortClaimWatcher(nc.recorder)
@@ -491,10 +512,6 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP, no
 		gw.portClaimWatcher = portClaimWatcher
 	}
 
-	if err := addHostMACBindings(kubeIntf); err != nil {
-		return fmt.Errorf("failed to add MAC bindings for service routing")
-	}
-
 	err = gw.Init(nc.stopChan, nc.wg)
 	nc.Gateway = gw
 	return err
diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go
index 7e1f330937..b46657c5c2 100644
--- a/go-controller/pkg/node/gateway_init_linux_test.go
+++ b/go-controller/pkg/node/gateway_init_linux_test.go
@@ -925,7 +925,9 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName,
 
 			nodeAnnotator := kube.NewNodeAnnotator(k, existingNode.Name)
 
-			err := nc.initGatewayDPUHost(net.ParseIP(hostIP), nodeAnnotator)
+			err := nc.initGatewayDPUHostPreStart(net.ParseIP(hostIP), nodeAnnotator)
+			Expect(err).NotTo(HaveOccurred())
+			err = nc.initGatewayDPUHost()
 			Expect(err).NotTo(HaveOccurred())
 
 			link, err := netlink.LinkByName(uplinkName)
diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go
index 5d1d94cba6..773671b0ff 100644
--- a/go-controller/pkg/node/gateway_shared_intf.go
+++ b/go-controller/pkg/node/gateway_shared_intf.go
@@ -405,6 +405,11 @@ func (npw *nodePortWatcher) updateServiceFlowCache(service *corev1.Service, netI
 			defaultNetConfig := npw.ofm.defaultBridge.GetActiveNetworkBridgeConfigCopy(types.DefaultNetworkName)
 			var flows []string
 			clusterIPs := util.GetClusterIPs(service)
+			outputActions := fmt.Sprintf("output:%s", defaultNetConfig.OfPortPatch)
+			if config.Gateway.VLANID != 0 {
+				outputActions = fmt.Sprintf("mod_vlan_vid:%d,%s", config.Gateway.VLANID, outputActions)
+			}
+
 			for _, clusterIP := range clusterIPs {
 				ipPrefix := protoPrefixV4
 				if utilnet.IsIPv6String(clusterIP) {
@@ -415,9 +420,9 @@ func (npw *nodePortWatcher) updateServiceFlowCache(service *corev1.Service, netI
 				//              ip,nw_dst=10.96.0.1 actions=mod_dl_dst:02:42:ac:12:00:03,output:"patch-breth0_ov"
 				// This flow is used for UDNs and advertised UDNs to be able to reach kapi and dns services alone on default network
 				flows = append(flows, fmt.Sprintf("cookie=%s, priority=300, table=2, %s, %s_dst=%s, "+
-					"actions=set_field:%s->eth_dst,output:%s",
+					"actions=set_field:%s->eth_dst,%s",
 					nodetypes.DefaultOpenFlowCookie, ipPrefix, ipPrefix, clusterIP,
-					npw.ofm.getDefaultBridgeMAC().String(), defaultNetConfig.OfPortPatch))
+					npw.ofm.getDefaultBridgeMAC().String(), outputActions))
 
 				if util.IsRouteAdvertisementsEnabled() {
 					// if the network is advertised, then for the reply from kapi and dns services to go back
@@ -1584,8 +1589,7 @@ func newNodePortWatcher(
 	// on the OVS bridge in the host. These flows act only on the packets coming in from outside
 	// of the node. If someone on the node is trying to access the NodePort service, those packets
 	// will not be processed by the OpenFlow flows, so we need to add iptable rules that DNATs the
-	// NodePortIP:NodePort to ClusterServiceIP:Port. We don't need to do this while
-	// running on DPU or on DPU-Host.
+	// NodePortIP:NodePort to ClusterServiceIP:Port. We don't need to do this on DPU.
 	if config.OvnKubeNode.Mode == types.NodeModeFull {
 		if config.Gateway.Mode == config.GatewayModeLocal {
 			if err := initLocalGatewayIPTables(); err != nil {
diff --git a/go-controller/pkg/node/gateway_udn.go b/go-controller/pkg/node/gateway_udn.go
index 9d3d0a75ac..a284dca0cd 100644
--- a/go-controller/pkg/node/gateway_udn.go
+++ b/go-controller/pkg/node/gateway_udn.go
@@ -125,13 +125,17 @@ func NewUserDefinedNetworkGateway(netInfo util.NetInfo, node *corev1.Node, nodeL
 		return nil, fmt.Errorf("unable to dereference default node network controller gateway object")
 	}
 
-	if gw.openflowManager == nil {
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost && gw.openflowManager == nil {
 		return nil, fmt.Errorf("openflow manager has not been provided for network: %s", netInfo.GetNetworkName())
 	}
-	intfName := gw.openflowManager.defaultBridge.GetGatewayIface()
+
+	intfName := gw.GetGatewayIface()
+	if intfName == "" {
+		return nil, fmt.Errorf("failed to get gateway interface for network: %s", netInfo.GetNetworkName())
+	}
 	link, err := util.GetNetLinkOps().LinkByName(intfName)
 	if err != nil {
-		return nil, fmt.Errorf("unable to get link for %s, error: %v", intfName, err)
+		return nil, fmt.Errorf("unable to get link for gateway interface %s, error: %v", intfName, err)
 	}
 
 	return &UserDefinedNetworkGateway{
@@ -204,7 +208,7 @@ func (udng *UserDefinedNetworkGateway) addMarkChain() error {
 // AddNetwork will be responsible to create all plumbings
 // required by this UDN on the gateway side
 func (udng *UserDefinedNetworkGateway) AddNetwork() error {
-	if udng.openflowManager == nil {
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost && udng.openflowManager == nil {
 		return fmt.Errorf("openflow manager has not been provided for network: %s", udng.NetInfo.GetNetworkName())
 	}
 	// port is created first and its MAC address configured. The IP(s) on that link are added after enslaving to a VRF device (addUDNManagementPortIPs)
@@ -214,81 +218,94 @@ func (udng *UserDefinedNetworkGateway) AddNetwork() error {
 	if err != nil {
 		return fmt.Errorf("could not create management port netdevice for network %s: %w", udng.GetNetworkName(), err)
 	}
-	vrfDeviceName := util.GetNetworkVRFName(udng.NetInfo)
-	routes, err := udng.computeRoutesForUDN(mplink)
-	if err != nil {
-		return fmt.Errorf("failed to compute routes for network %s, err: %v", udng.GetNetworkName(), err)
-	}
-	if err = udng.vrfManager.AddVRF(vrfDeviceName, mplink.Attrs().Name, uint32(udng.vrfTableId), nil); err != nil {
-		return fmt.Errorf("could not add VRF %d for network %s, err: %v", udng.vrfTableId, udng.GetNetworkName(), err)
-	}
-	if err = udng.addUDNManagementPortIPs(mplink); err != nil {
-		return fmt.Errorf("unable to add management port IP(s) for link %s, for network %s: %w", mplink.Attrs().Name, udng.GetNetworkName(), err)
-	}
-	if err = udng.vrfManager.AddVRFRoutes(vrfDeviceName, routes); err != nil {
-		return fmt.Errorf("could not add VRF %s routes for network %s, err: %v", vrfDeviceName, udng.GetNetworkName(), err)
+
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
+		vrfDeviceName := util.GetNetworkVRFName(udng.NetInfo)
+		routes, err := udng.computeRoutesForUDN(mplink)
+		if err != nil {
+			return fmt.Errorf("failed to compute routes for network %s, err: %v", udng.GetNetworkName(), err)
+		}
+		if err = udng.vrfManager.AddVRF(vrfDeviceName, mplink.Attrs().Name, uint32(udng.vrfTableId), nil); err != nil {
+			return fmt.Errorf("could not add VRF %d for network %s, err: %v", udng.vrfTableId, udng.GetNetworkName(), err)
+		}
+		if err = udng.addUDNManagementPortIPs(mplink); err != nil {
+			return fmt.Errorf("unable to add management port IP(s) for link %s, for network %s: %w", mplink.Attrs().Name, udng.GetNetworkName(), err)
+		}
+		if err = udng.vrfManager.AddVRFRoutes(vrfDeviceName, routes); err != nil {
+			return fmt.Errorf("could not add VRF %s routes for network %s, err: %v", vrfDeviceName, udng.GetNetworkName(), err)
+		}
 	}
 
 	udng.updateAdvertisementStatus()
 
-	// create the iprules for this network
-	if err = udng.updateUDNVRFIPRules(); err != nil {
-		return fmt.Errorf("failed to update IP rules for network %s: %w", udng.GetNetworkName(), err)
-	}
-
-	if err = udng.updateAdvertisedUDNIsolationRules(); err != nil {
-		return fmt.Errorf("failed to update isolation rules for network %s: %w", udng.GetNetworkName(), err)
-	}
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
+		// create the iprules for this network
+		if err = udng.updateUDNVRFIPRules(); err != nil {
+			return fmt.Errorf("failed to update IP rules for network %s: %w", udng.GetNetworkName(), err)
+		}
 
-	if err := udng.updateUDNVRFIPRoute(); err != nil {
-		return fmt.Errorf("failed to update ip routes for network %s: %w", udng.GetNetworkName(), err)
-	}
+		if err = udng.updateAdvertisedUDNIsolationRules(); err != nil {
+			return fmt.Errorf("failed to update isolation rules for network %s: %w", udng.GetNetworkName(), err)
+		}
 
-	// add loose mode for rp filter on management port
-	mgmtPortName := util.GetNetworkScopedK8sMgmtHostIntfName(uint(udng.GetNetworkID()))
-	if err := addRPFilterLooseModeForManagementPort(mgmtPortName); err != nil {
-		return fmt.Errorf("could not set loose mode for reverse path filtering on management port %s: %v", mgmtPortName, err)
-	}
+		if err := udng.updateUDNVRFIPRoute(); err != nil {
+			return fmt.Errorf("failed to update ip routes for network %s: %w", udng.GetNetworkName(), err)
+		}
 
-	nodeSubnets, err := udng.getLocalSubnets()
-	var mgmtIPs []*net.IPNet
-	for _, subnet := range nodeSubnets {
-		mgmtIPs = append(mgmtIPs, udng.GetNodeManagementIP(subnet))
-	}
-	if err != nil {
-		return fmt.Errorf("failed to get node subnets for network %s: %w", udng.GetNetworkName(), err)
-	}
-	if err = udng.openflowManager.addNetwork(udng.NetInfo, nodeSubnets, mgmtIPs, udng.masqCTMark, udng.pktMark, udng.v6MasqIPs, udng.v4MasqIPs); err != nil {
-		return fmt.Errorf("could not add network %s: %v", udng.GetNetworkName(), err)
+		// add loose mode for rp filter on management port
+		mgmtPortName := util.GetNetworkScopedK8sMgmtHostIntfName(uint(udng.GetNetworkID()))
+		if err := addRPFilterLooseModeForManagementPort(mgmtPortName); err != nil {
+			return fmt.Errorf("could not set loose mode for reverse path filtering on management port %s: %v", mgmtPortName, err)
+		}
 	}
 
-	waiter := newStartupWaiterWithTimeout(waitForPatchPortTimeout)
-	readyFunc := func() (bool, error) {
-		if err := udng.openflowManager.defaultBridge.SetNetworkOfPatchPort(udng.GetNetworkName()); err != nil {
-			klog.V(3).Infof("Failed to set network %s's openflow ports for default bridge; error: %v", udng.GetNetworkName(), err)
-			return false, nil
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
+		nodeSubnets, err := udng.getLocalSubnets()
+		if err != nil {
+			return fmt.Errorf("failed to get node subnets for network %s: %w", udng.GetNetworkName(), err)
 		}
-		if udng.openflowManager.externalGatewayBridge != nil {
-			if err := udng.openflowManager.externalGatewayBridge.SetNetworkOfPatchPort(udng.GetNetworkName()); err != nil {
-				klog.V(3).Infof("Failed to set network %s's openflow ports for secondary bridge; error: %v", udng.GetNetworkName(), err)
+		var mgmtIPs []*net.IPNet
+		for _, subnet := range nodeSubnets {
+			mgmtIPs = append(mgmtIPs, udng.GetNodeManagementIP(subnet))
+		}
+		if err = udng.openflowManager.addNetwork(udng.NetInfo, nodeSubnets, mgmtIPs, udng.masqCTMark, udng.pktMark, udng.v6MasqIPs, udng.v4MasqIPs); err != nil {
+			return fmt.Errorf("could not add network %s: %v", udng.GetNetworkName(), err)
+		}
+
+		waiter := newStartupWaiterWithTimeout(waitForPatchPortTimeout)
+		readyFunc := func() (bool, error) {
+			if err := udng.openflowManager.defaultBridge.SetNetworkOfPatchPort(udng.GetNetworkName()); err != nil {
+				klog.V(3).Infof("Failed to set network %s's openflow ports for default bridge; error: %v", udng.GetNetworkName(), err)
 				return false, nil
 			}
+			if udng.openflowManager.externalGatewayBridge != nil {
+				if err := udng.openflowManager.externalGatewayBridge.SetNetworkOfPatchPort(udng.GetNetworkName()); err != nil {
+					klog.V(3).Infof("Failed to set network %s's openflow ports for secondary bridge; error: %v", udng.GetNetworkName(), err)
+					return false, nil
+				}
+			}
+			return true, nil
 		}
-		return true, nil
-	}
-	postFunc := func() error {
+		postFunc := func() error {
+			if err := udng.gateway.Reconcile(); err != nil {
+				return fmt.Errorf("failed to reconcile flows on bridge for network %s; error: %v", udng.GetNetworkName(), err)
+			}
+			return nil
+		}
+		waiter.AddWait(readyFunc, postFunc)
+		if err := waiter.Wait(); err != nil {
+			return err
+		}
+	} else {
 		if err := udng.gateway.Reconcile(); err != nil {
 			return fmt.Errorf("failed to reconcile flows on bridge for network %s; error: %v", udng.GetNetworkName(), err)
 		}
-		return nil
-	}
-	waiter.AddWait(readyFunc, postFunc)
-	if err := waiter.Wait(); err != nil {
-		return err
 	}
 
-	if err := udng.addMarkChain(); err != nil {
-		return fmt.Errorf("failed to add the service masquerade chain: %w", err)
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
+		if err := udng.addMarkChain(); err != nil {
+			return fmt.Errorf("failed to add the service masquerade chain: %w", err)
+		}
 	}
 
 	// run gateway reconciliation loop on network configuration changes
@@ -305,33 +322,41 @@ func (udng *UserDefinedNetworkGateway) GetNetworkRuleMetadata() string {
 // the gateway side. It's considered invalid to call this instance after
 // DelNetwork has returned succesfully.
 func (udng *UserDefinedNetworkGateway) DelNetwork() error {
-	vrfDeviceName := util.GetNetworkVRFName(udng.NetInfo)
-	// delete the iprules for this network
-	if err := udng.ruleManager.DeleteWithMetadata(udng.GetNetworkRuleMetadata()); err != nil {
-		return fmt.Errorf("unable to delete iprules for network %s, err: %v", udng.GetNetworkName(), err)
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
+		vrfDeviceName := util.GetNetworkVRFName(udng.NetInfo)
+		// delete the iprules for this network
+		if err := udng.ruleManager.DeleteWithMetadata(udng.GetNetworkRuleMetadata()); err != nil {
+			return fmt.Errorf("unable to delete iprules for network %s, err: %v", udng.GetNetworkName(), err)
+		}
+		// delete the VRF device for this network
+		if err := udng.vrfManager.DeleteVRF(vrfDeviceName); err != nil {
+			return err
+		}
 	}
-	// delete the VRF device for this network
-	if err := udng.vrfManager.DeleteVRF(vrfDeviceName); err != nil {
-		return err
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
+		// delete the openflows for this network
+		if udng.openflowManager != nil {
+			udng.openflowManager.delNetwork(udng.NetInfo)
+		}
 	}
-	// delete the openflows for this network
-	if udng.openflowManager != nil {
-		udng.openflowManager.delNetwork(udng.NetInfo)
+	if udng.openflowManager != nil || config.OvnKubeNode.Mode == types.NodeModeDPUHost {
 		if err := udng.gateway.Reconcile(); err != nil {
 			return fmt.Errorf("failed to reconcile default gateway for network %s, err: %v", udng.GetNetworkName(), err)
 		}
 	}
 
-	err := udng.deleteAdvertisedUDNIsolationRules()
-	if err != nil {
-		return fmt.Errorf("failed to remove advertised UDN isolation rules for network %s: %w", udng.GetNetworkName(), err)
-	}
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
+		err := udng.deleteAdvertisedUDNIsolationRules()
+		if err != nil {
+			return fmt.Errorf("failed to remove advertised UDN isolation rules for network %s: %w", udng.GetNetworkName(), err)
+		}
 
-	if err := udng.delMarkChain(); err != nil {
-		return err
+		if err := udng.delMarkChain(); err != nil {
+			return err
+		}
 	}
 	// delete the management port interface for this network
-	err = udng.deleteUDNManagementPort()
+	err := udng.deleteUDNManagementPort()
 	if err != nil {
 		return err
 	}
@@ -795,40 +820,50 @@ func (udng *UserDefinedNetworkGateway) Reconcile() {
 func (udng *UserDefinedNetworkGateway) doReconcile() error {
 	klog.Infof("Reconciling gateway with updates for UDN %s", udng.GetNetworkName())
 
-	// shouldn't happen
-	if udng.openflowManager == nil || udng.openflowManager.defaultBridge == nil {
-		return fmt.Errorf("openflow manager with default bridge configuration has not been provided for network %s", udng.GetNetworkName())
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
+		// shouldn't happen
+		if udng.openflowManager == nil || udng.openflowManager.defaultBridge == nil {
+			return fmt.Errorf("openflow manager with default bridge configuration has not been provided for network %s", udng.GetNetworkName())
+		}
 	}
 
 	udng.updateAdvertisementStatus()
 
-	// update bridge configuration
-	netConfig := udng.openflowManager.defaultBridge.GetNetworkConfig(udng.GetNetworkName())
-	if netConfig == nil {
-		return fmt.Errorf("missing bridge configuration for network %s", udng.GetNetworkName())
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
+		// update bridge configuration
+		netConfig := udng.openflowManager.defaultBridge.GetNetworkConfig(udng.GetNetworkName())
+		if netConfig == nil {
+			return fmt.Errorf("missing bridge configuration for network %s", udng.GetNetworkName())
+		}
+		netConfig.Advertised.Store(udng.isNetworkAdvertised)
 	}
-	netConfig.Advertised.Store(udng.isNetworkAdvertised)
 
-	if err := udng.updateUDNVRFIPRules(); err != nil {
-		return fmt.Errorf("error while updating ip rule for UDN %s: %s", udng.GetNetworkName(), err)
-	}
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
+		if err := udng.updateUDNVRFIPRules(); err != nil {
+			return fmt.Errorf("error while updating ip rule for UDN %s: %s", udng.GetNetworkName(), err)
+		}
 
-	if err := udng.updateUDNVRFIPRoute(); err != nil {
-		return fmt.Errorf("error while updating ip route for UDN %s: %s", udng.GetNetworkName(), err)
+		if err := udng.updateUDNVRFIPRoute(); err != nil {
+			return fmt.Errorf("error while updating ip route for UDN %s: %s", udng.GetNetworkName(), err)
+		}
 	}
 
-	// add below OpenFlows based on the gateway mode and whether the network is advertised or not:
-	// table=1, n_packets=0, n_bytes=0, priority=16,ip,nw_dst=128.192.0.2 actions=LOCAL (Both gateway modes)
-	// table=1, n_packets=0, n_bytes=0, priority=15,ip,nw_dst=128.192.0.0/14 actions=output:3 (shared gateway mode)
-	// necessary service isolation flows based on whether network is advertised or not
-	if err := udng.openflowManager.updateBridgeFlowCache(udng.nodeIPManager.ListAddresses()); err != nil {
-		return fmt.Errorf("error while updating logical flow for UDN %s: %s", udng.GetNetworkName(), err)
+	if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
+		// add below OpenFlows based on the gateway mode and whether the network is advertised or not:
+		// table=1, n_packets=0, n_bytes=0, priority=16,ip,nw_dst=128.192.0.2 actions=LOCAL (Both gateway modes)
+		// table=1, n_packets=0, n_bytes=0, priority=15,ip,nw_dst=128.192.0.0/14 actions=output:3 (shared gateway mode)
+		// necessary service isolation flows based on whether network is advertised or not
+		if err := udng.openflowManager.updateBridgeFlowCache(udng.nodeIPManager.ListAddresses()); err != nil {
+			return fmt.Errorf("error while updating logical flow for UDN %s: %s", udng.GetNetworkName(), err)
+		}
+		// let's sync these flows immediately
+		udng.openflowManager.requestFlowSync()
 	}
-	// let's sync these flows immediately
-	udng.openflowManager.requestFlowSync()
 
-	if err := udng.updateAdvertisedUDNIsolationRules(); err != nil {
-		return fmt.Errorf("error while updating advertised UDN isolation rules for network %s: %w", udng.GetNetworkName(), err)
+	if config.OvnKubeNode.Mode != types.NodeModeDPU {
+		if err := udng.updateAdvertisedUDNIsolationRules(); err != nil {
+			return fmt.Errorf("error while updating advertised UDN isolation rules for network %s: %w", udng.GetNetworkName(), err)
+		}
 	}
 	return nil
 }
diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go
index e3abb1550c..91c092c208 100644
--- a/go-controller/pkg/ovn/base_network_controller_pods.go
+++ b/go-controller/pkg/ovn/base_network_controller_pods.go
@@ -1032,22 +1032,17 @@ func (bnc *BaseNetworkController) allocatesPodAnnotation() bool {
 
 func (bnc *BaseNetworkController) shouldReleaseDeletedPod(pod *corev1.Pod, switchName, nad string, podIfAddrs []*net.IPNet) (bool, error) {
 	var err error
-	var isMigratedSourcePodStale bool
-	if !bnc.IsUserDefinedNetwork() {
-		isMigratedSourcePodStale, err = kubevirt.IsMigratedSourcePodStale(bnc.watchFactory, pod)
+	if !bnc.IsUserDefinedNetwork() && kubevirt.IsPodLiveMigratable(pod) {
+		allVMPodsAreCompleted, err := kubevirt.AllVMPodsAreCompleted(bnc.watchFactory, pod)
 		if err != nil {
 			return false, err
 		}
-	}
-
-	// Removing the the kubevirt stale pods should not de allocate the IPs
-	// to ensure that new pods do not take them
-	if isMigratedSourcePodStale {
-		return false, nil
-	}
 
-	if !util.PodCompleted(pod) {
-		return true, nil
+		// Removing the the kubevirt stale pods should not de allocate the IPs
+		// to ensure that new pods do not take them
+		if !allVMPodsAreCompleted {
+			return false, nil
+		}
 	}
 
 	if bnc.wasPodReleasedBeforeStartup(string(pod.UID), nad) {
@@ -1059,16 +1054,21 @@ func (bnc *BaseNetworkController) shouldReleaseDeletedPod(pod *corev1.Pod, switc
 		return false, nil
 	}
 
-	shouldReleasePodIPs := func() (bool, error) {
-		// If this pod applies to live migration it could have migrated so get the
-		// correct node name corresponding with the subnet. If the subnet is not
-		// tracked within the zone, nodeName will be empty which will force
-		// canReleasePodIPs to lookup all nodes.
-		nodeName := pod.Spec.NodeName
-		if !bnc.IsUserDefinedNetwork() && kubevirt.IsPodLiveMigratable(pod) {
-			nodeName, _ = bnc.lsManager.GetSubnetName(podIfAddrs)
-		}
+	// If this pod applies to live migration it could have migrated within the
+	// zone so get the correct node name corresponding with the subnet. If the
+	// subnet is not tracked within the zone, nodeName will be empty which
+	// will force a lookup for all nodes.
+	zoneOwnsSubnet := true
+	nodeName := pod.Spec.NodeName
+	if !bnc.IsUserDefinedNetwork() && kubevirt.IsPodLiveMigratable(pod) {
+		switchName, zoneOwnsSubnet = bnc.lsManager.GetSubnetName(podIfAddrs)
+	}
+	if !zoneOwnsSubnet {
+		// force shouldReleasePodIPs to search all nodes
+		nodeName = ""
+	}
 
+	shouldReleasePodIPs := func() (bool, error) {
 		shouldRelease, err := bnc.canReleasePodIPs(podIfAddrs, nodeName)
 		if err != nil {
 			return false, err
@@ -1082,7 +1082,7 @@ func (bnc *BaseNetworkController) shouldReleaseDeletedPod(pod *corev1.Pod, switc
 	// if other pods are using the same IPs just in case we are processing
 	// events in different order than cluster manager did (best effort, there
 	// can still be issues with this)
-	if !bnc.allocatesPodAnnotation() {
+	if !bnc.allocatesPodAnnotation() || !zoneOwnsSubnet {
 		shouldRelease, err = shouldReleasePodIPs()
 	} else {
 		shouldRelease, err = bnc.lsManager.ConditionalIPRelease(switchName, podIfAddrs, shouldReleasePodIPs)
diff --git a/go-controller/pkg/ovn/kubevirt_test.go b/go-controller/pkg/ovn/kubevirt_test.go
index 061b9fc5f6..c311ad657b 100644
--- a/go-controller/pkg/ovn/kubevirt_test.go
+++ b/go-controller/pkg/ovn/kubevirt_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"net"
+	"slices"
 	"strings"
 	"time"
 
@@ -20,6 +21,7 @@ import (
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kubevirt"
 	libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
+	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb"
 	ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
 	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
@@ -41,6 +43,8 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 		dnsServiceIPv6    = "fd7b:6b4d:7b25:d22f::3"
 		clusterCIDRIPv4   = "10.128.0.0/16"
 		clusterCIDRIPv6   = "fe00::/64"
+		subnetSuffixIPv4  = "/24"
+		subnetSuffixIPv6  = "/64"
 	)
 	type testDHCPOptions struct {
 		cidr     string
@@ -104,6 +108,8 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 		subnetIPv6            string
 		transitSwitchPortIPv4 string
 		transitSwitchPortIPv6 string
+		addressIPv4           string
+		addressIPv6           string
 	}
 
 	type testVM struct {
@@ -124,6 +130,8 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 				lrpNetworkIPv6:        "fd98::4/64",
 				transitSwitchPortIPv4: "100.65.0.4/24",
 				transitSwitchPortIPv6: "fd13::4/64",
+				addressIPv4:           "10.89.0.1/24",
+				addressIPv6:           "fc00:f853:ccd:e793::1/64",
 			},
 			node2: {
 				nodeID:                "5",
@@ -133,6 +141,8 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 				lrpNetworkIPv6:        "fd98::5/64",
 				transitSwitchPortIPv4: "100.65.0.5/24",
 				transitSwitchPortIPv6: "fd13::5/64",
+				addressIPv4:           "10.89.0.2/24",
+				addressIPv6:           "fc00:f853:ccd:e793::2/64",
 			},
 			node3: {
 				nodeID:                "6",
@@ -142,6 +152,8 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 				lrpNetworkIPv6:        "fd98::6/64",
 				transitSwitchPortIPv4: "100.65.0.6/24",
 				transitSwitchPortIPv6: "fd13::6/64",
+				addressIPv4:           "10.89.0.3/24",
+				addressIPv6:           "fc00:f853:ccd:e793::3/64",
 			},
 		}
 		vmByName = map[string]testVM{
@@ -245,6 +257,27 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 			return append(previousData, data...)
 		}
 
+		filterOutStaleVirtLauncherExpectedTestData = func(namespace, name string, previousData []libovsdb.TestData) []libovsdb.TestData {
+			var data []libovsdb.TestData
+			lspUUID := util.GetLogicalPortName(namespace, name) + "-UUID"
+			for _, d := range previousData {
+				switch model := d.(type) {
+				case *nbdb.LogicalSwitch:
+					lsp := *model
+					lsp.Ports = slices.Clone(lsp.Ports)
+					lsp.Ports = slices.DeleteFunc(lsp.Ports, func(port string) bool { return port == lspUUID })
+					d = &lsp
+				case *nbdb.LogicalSwitchPort:
+					if model.UUID == lspUUID {
+						continue
+					}
+				}
+				data = append(data, d)
+
+			}
+			return data
+		}
+
 		newPodFromTestVirtLauncherPod = func(t testVirtLauncherPod) *corev1.Pod {
 			if t.podName == "" {
 				return nil
@@ -342,8 +375,35 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 				ExternalIDs: ids,
 			}
 		}
+		composeNats = func(pod testVirtLauncherPod) ([]string, []*nbdb.NAT) {
+			var ids []string
+			var nats []*nbdb.NAT
+			if config.IPv4Mode {
+				id := pod.podName + "-IPv4-NAD-UUID"
+				nats = append(nats, &nbdb.NAT{
+					UUID:       id,
+					LogicalIP:  pod.addressIPv4,
+					ExternalIP: testing.MustParseIPNet(nodeByName[pod.nodeName].addressIPv4).IP.String(),
+					Options:    map[string]string{"stateless": "false"},
+					Type:       "snat",
+				})
+				ids = append(ids, id)
+			}
+			if config.IPv6Mode {
+				id := pod.podName + "-IPv6-NAD-UUID"
+				nats = append(nats, &nbdb.NAT{
+					UUID:       id,
+					LogicalIP:  pod.addressIPv6,
+					ExternalIP: testing.MustParseIPNet(nodeByName[pod.nodeName].addressIPv6).IP.String(),
+					Options:    map[string]string{"stateless": "false"},
+					Type:       "snat",
+				})
+				ids = append(ids, id)
+			}
+			return ids, nats
+		}
 
-		expectedNBDBAfterCleanup = func(expectedStaticRoutes []*nbdb.LogicalRouterStaticRoute) []libovsdb.TestData {
+		expectedNBDBAfterCleanup = func(expectedStaticRoutes []*nbdb.LogicalRouterStaticRoute, expectedNATs map[string][]*nbdb.NAT) []libovsdb.TestData {
 			data := []libovsdb.TestData{}
 			expectedPoliciesAfterCleanup := []string{}
 			expectedStaticRoutesAfterCleanup := []string{}
@@ -366,6 +426,11 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 					continue
 				} else if lr, ok := nbData.(*nbdb.LogicalRouter); ok && lr.Name == ovntypes.OVNClusterRouter {
 					expectedOvnClusterRouterAfterCleanup = lr
+				} else if lr, ok := nbData.(*nbdb.LogicalRouter); ok && expectedNATs[lr.Name] != nil {
+					for _, nat := range expectedNATs[lr.Name] {
+						lr.Nat = append(lr.Nat, nat.UUID)
+						data = append(data, nat)
+					}
 				}
 				data = append(data, nbData)
 			}
@@ -487,6 +552,7 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 
 		// To skip port group not found error
 		config.EnableMulticast = false
+		config.Gateway.DisableSNATMultipleGWs = true
 
 		fakeOvn = NewFakeOVN(true)
 	})
@@ -671,6 +737,8 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 									Annotations: map[string]string{
 										"k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node1].transitSwitchPortIPv4, nodeByName[node1].transitSwitchPortIPv6),
 										"k8s.ovn.org/node-subnets":                    fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node1].subnetIPv4, nodeByName[node1].subnetIPv6),
+										"k8s.ovn.org/l3-gateway-config":               fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:51", "ip-addresses":[%q, %q]}}`, nodeByName[node1].addressIPv4, nodeByName[node1].addressIPv6),
+										"k8s.ovn.org/node-chassis-id":                 "1",
 										util.OvnNodeID:                                nodeByName[node1].nodeID,
 									},
 								},
@@ -681,6 +749,8 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 									Annotations: map[string]string{
 										"k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node2].transitSwitchPortIPv4, nodeByName[node2].transitSwitchPortIPv6),
 										"k8s.ovn.org/node-subnets":                    fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node2].subnetIPv4, nodeByName[node2].subnetIPv6),
+										"k8s.ovn.org/l3-gateway-config":               fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:52", "ip-addresses":[%q, %q]}}`, nodeByName[node2].addressIPv4, nodeByName[node2].addressIPv6),
+										"k8s.ovn.org/node-chassis-id":                 "2",
 										util.OvnNodeID:                                nodeByName[node2].nodeID,
 									},
 								},
@@ -691,6 +761,8 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 									Annotations: map[string]string{
 										"k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node3].transitSwitchPortIPv4, nodeByName[node3].transitSwitchPortIPv6),
 										"k8s.ovn.org/node-subnets":                    fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node3].subnetIPv4, nodeByName[node3].subnetIPv6),
+										"k8s.ovn.org/l3-gateway-config":               fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:53", "ip-addresses":[%q, %q]}}`, nodeByName[node3].addressIPv4, nodeByName[node3].addressIPv6),
+										"k8s.ovn.org/node-chassis-id":                 "3",
 										util.OvnNodeID:                                nodeByName[node3].nodeID,
 									},
 								},
@@ -749,6 +821,7 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 				}
 
 				expectedOVN := []libovsdb.TestData{}
+
 				ovnClusterRouter.Policies = []string{}
 				expectedOVNClusterRouter := ovnClusterRouter.DeepCopy()
 				expectedOVNClusterRouter.Policies = []string{}
@@ -781,23 +854,38 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 					expectedOVN = append(expectedOVN, ComposeDHCPv6Options(dhcpv6OptionsUUID+d.hostname, t.namespace, &d))
 				}
 				expectedSourceLSRP := migrationSourceLSRP.DeepCopy()
+				expectedGWRouter := gwRouter.DeepCopy()
 				expectedOVN = append(expectedOVN,
 					expectedOVNClusterRouter,
-					gwRouter,
+					expectedGWRouter,
 					logicalRouterPort,
 					expectedSourceLSRP,
 				)
 				expectedOVN = kubevirtOVNTestData(t, expectedOVN)
 
+				var expectedMigrationTargetGWRouter *nbdb.LogicalRouter
 				if t.migrationTarget.nodeName != "" {
 					expectedTargetLSRP := migrationTargetLSRP.DeepCopy()
+					expectedMigrationTargetGWRouter = migrationTargetGWRouter.DeepCopy()
 					expectedOVN = append(expectedOVN,
 						migrationTargetLRP,
 						expectedTargetLSRP,
-						migrationTargetGWRouter,
+						expectedMigrationTargetGWRouter,
 					)
 				}
+
+				for router, testpod := range map[*nbdb.LogicalRouter]testVirtLauncherPod{expectedGWRouter: t.testVirtLauncherPod, expectedMigrationTargetGWRouter: t.migrationTarget.testVirtLauncherPod} {
+					if _, isLocal := fakeOvn.controller.localZoneNodes.Load(testpod.nodeName); isLocal && router != nil && testpod.podName != "" {
+						natIDs, nats := composeNats(testpod)
+						router.Nat = append(router.Nat, natIDs...)
+						for _, nat := range nats {
+							expectedOVN = append(expectedOVN, nat)
+						}
+					}
+				}
+
 				Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedOVN), "should populate ovn")
+
 				if t.replaceNode != "" {
 					By("Replace vm node with newNode at the logical switch manager")
 					newNode := &corev1.Node{
@@ -830,26 +918,87 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 					Expect(err).ToNot(HaveOccurred())
 				}
 
-				if t.podName != "" {
-					pod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Get(context.TODO(), t.podName, metav1.GetOptions{})
+				var vmIPs []string
+				if t.addressIPv4 != "" {
+					vmIPs = append(vmIPs, t.addressIPv4+subnetSuffixIPv4)
+				}
+				if t.addressIPv6 != "" {
+					vmIPs = append(vmIPs, t.addressIPv6+subnetSuffixIPv6)
+				}
+				vmIPNets := testing.MustParseIPNets(vmIPs...)
+				subnet, checkRelease := fakeOvn.controller.lsManager.GetSubnetName(vmIPNets)
+
+				if t.podName == "" {
+					return nil
+				}
+
+				completeAndDeletePod := func(namespace, name string) {
+					GinkgoHelper()
+					pod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespace).Get(context.TODO(), name, metav1.GetOptions{})
 					Expect(err).NotTo(HaveOccurred())
 					pod.Status.Phase = corev1.PodSucceeded
-					_, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{})
+					_, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{})
 					Expect(err).NotTo(HaveOccurred())
-					err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Delete(context.TODO(), t.podName, metav1.DeleteOptions{})
+					err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{})
 					Expect(err).NotTo(HaveOccurred())
+				}
 
-					if t.migrationTarget.nodeName != "" {
-						pod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Get(context.TODO(), t.migrationTarget.podName, metav1.GetOptions{})
-						Expect(err).NotTo(HaveOccurred())
-						pod.Status.Phase = corev1.PodSucceeded
-						_, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{})
-						Expect(err).NotTo(HaveOccurred())
-						err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Delete(context.TODO(), t.migrationTarget.podName, metav1.DeleteOptions{})
-						Expect(err).NotTo(HaveOccurred())
+				deleteFirst := t.testVirtLauncherPod
+				deleteSecond := t.migrationTarget.testVirtLauncherPod
+				deleteFirstRouter := gwRouter
+				hasMigration := t.migrationTarget.nodeName != ""
+				hasUnsuccesfulMigration := hasMigration && t.migrationTarget.updatePhase != nil
+				if hasUnsuccesfulMigration {
+					deleteFirst = t.migrationTarget.testVirtLauncherPod
+					deleteFirstRouter = migrationTargetGWRouter
+					deleteSecond = t.testVirtLauncherPod
+				}
+
+				completeAndDeletePod(t.namespace, deleteFirst.podName)
+				if !hasMigration {
+					if checkRelease {
+						Eventually(fakeOvn.controller.lsManager.AllocateIPs).
+							WithArguments(subnet, vmIPNets).
+							Should(Succeed(), "should have de-allocated VM IP after termination")
 					}
-					Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedNBDBAfterCleanup(expectedStaticRoutes)), "should cleanup ovn")
+					Eventually(fakeOvn.nbClient).Should(
+						libovsdb.HaveData(expectedNBDBAfterCleanup(expectedStaticRoutes, nil)),
+						"should cleanup terminated pod data from ovn",
+					)
+					return nil
 				}
+				if checkRelease {
+					Consistently(fakeOvn.controller.lsManager.AllocateIPs).
+						WithArguments(subnet, vmIPNets).
+						ShouldNot(Succeed(), "should have not de-allocated VM IP after migration")
+				}
+
+				Eventually(fakeOvn.nbClient).Should(
+					libovsdb.HaveData(filterOutStaleVirtLauncherExpectedTestData(t.namespace, deleteFirst.podName, expectedOVN)),
+					"should cleanup source pod data from ovn",
+				)
+
+				completeAndDeletePod(t.namespace, deleteSecond.podName)
+				if checkRelease {
+					Eventually(fakeOvn.controller.lsManager.AllocateIPs).
+						WithArguments(subnet, vmIPNets).
+						Should(Succeed(), "should have de-allocated target VM IP after termination")
+				}
+
+				// FIXME: for some reason we don't remove stale NATs of migrated
+				// VMs. One possible reason is if VMs can migrate within the
+				// same node and we can race between creation and deletion. Can
+				// it happen?
+				// https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5627
+				expectedNATs := map[string][]*nbdb.NAT{}
+				if _, isLocal := fakeOvn.controller.localZoneNodes.Load(deleteFirst.nodeName); isLocal {
+					_, nats := composeNats(deleteFirst)
+					expectedNATs[deleteFirstRouter.Name] = nats
+				}
+				Eventually(fakeOvn.nbClient).Should(
+					libovsdb.HaveData(expectedNBDBAfterCleanup(expectedStaticRoutes, expectedNATs)),
+					"should cleanup terminated target pod data from ovn",
+				)
 
 				return nil
 			}
@@ -1261,7 +1410,18 @@ var _ = Describe("OVN Kubevirt Operations", func() {
 						zone:    kubevirt.OvnRemoteZone,
 					},
 				},
-				testVirtLauncherPod: virtLauncher2(node1, vm1),
+				testVirtLauncherPod: testVirtLauncherPod{
+					suffix: "1",
+					testPod: testPod{
+						nodeName: node2,
+					},
+					vmName:             vm1,
+					skipPodAnnotations: false, /* add ovn pod annotation */
+				},
+				migrationTarget: testMigrationTarget{
+					lrpNetworks:         []string{nodeByName[node1].lrpNetworkIPv4, nodeByName[node1].lrpNetworkIPv6},
+					testVirtLauncherPod: virtLauncher2(node1, vm1),
+				},
 				expectedDhcpv4: []testDHCPOptions{{
 					cidr:     nodeByName[node1].subnetIPv4,
 					dns:      dnsServiceIPv4,
diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go
index d935bca85f..280e41eba3 100644
--- a/go-controller/pkg/ovn/ovn.go
+++ b/go-controller/pkg/ovn/ovn.go
@@ -335,15 +335,27 @@ func (oc *DefaultNetworkController) removeRemoteZonePod(pod *corev1.Pod) error {
 		return fmt.Errorf("failed to remove the remote zone pod: %w", err)
 	}
 
+	// FIXME: there are other things we are probably leaving behind and should
+	// be removed for completed VMs, like per-pod SNAT. Also
+	// removeRemoteZonePodFromNamespaceAddressSet above should probably not be
+	// called for migrations.
+	// https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5627
 	if kubevirt.IsPodLiveMigratable(pod) {
-		ips, err := util.GetPodCIDRsWithFullMask(pod, oc.GetNetInfo())
-		if err != nil && !errors.Is(err, util.ErrNoPodIPFound) {
-			return fmt.Errorf("failed to get pod ips for the pod %s/%s: %w", pod.Namespace, pod.Name, err)
+		allVMPodsAreCompleted, err := kubevirt.AllVMPodsAreCompleted(oc.watchFactory, pod)
+		if err != nil {
+			return err
 		}
-		switchName, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(oc.lsManager, ips)
-		if zoneContainsPodSubnet {
-			if err := oc.lsManager.ReleaseIPs(switchName, ips); err != nil {
-				return err
+
+		if allVMPodsAreCompleted {
+			ips, err := util.GetPodCIDRsWithFullMask(pod, oc.GetNetInfo())
+			if err != nil && !errors.Is(err, util.ErrNoPodIPFound) {
+				return fmt.Errorf("failed to get pod ips for the pod %s/%s: %w", pod.Namespace, pod.Name, err)
+			}
+			switchName, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(oc.lsManager, ips)
+			if zoneContainsPodSubnet {
+				if err := oc.lsManager.ReleaseIPs(switchName, ips); err != nil {
+					return err
+				}
 			}
 		}
 	}
diff --git a/go-controller/pkg/ovn/pods.go b/go-controller/pkg/ovn/pods.go
index 0ad9442e3e..80c431ef13 100644
--- a/go-controller/pkg/ovn/pods.go
+++ b/go-controller/pkg/ovn/pods.go
@@ -211,6 +211,23 @@ func (oc *DefaultNetworkController) deleteLogicalPort(pod *corev1.Pod, portInfo
 		return fmt.Errorf("cannot delete GW Routes for pod %s: %w", podDesc, err)
 	}
 
+	if kubevirt.IsPodLiveMigratable(pod) {
+		switchName, hasLocalIPs := oc.lsManager.GetSubnetName(pInfo.ips)
+		// don't attempt to release IPs that are not managed by this zone which can
+		// happen with live migratable pods, otherwise we would get distracting
+		// error logs on release
+		if !hasLocalIPs {
+			klog.V(5).Infof("Inhibiting release of live migratable pod %s/%s IPs %s not managed by this zone",
+				pod.Namespace, pod.Name,
+				util.JoinIPNetIPs(pInfo.ips, " "),
+			)
+			return nil
+		}
+		// a pod might have migrated from one node to another node in the same
+		// zone so fix the switch for which the release needs to happen
+		pInfo.logicalSwitch = switchName
+	}
+
 	// Releasing IPs needs to happen last so that we can deterministically know that if delete failed that
 	// the IP of the pod needs to be released. Otherwise we could have a completed pod failed to be removed
 	// and we dont know if the IP was released or not, and subsequently could accidentally release the IP
diff --git a/go-controller/pkg/util/egressip/net.go b/go-controller/pkg/util/egressip/net.go
new file mode 100644
index 0000000000..018e6d27f6
--- /dev/null
+++ b/go-controller/pkg/util/egressip/net.go
@@ -0,0 +1,39 @@
+package egressip
+
+import (
+	"math"
+	"net"
+
+	"github.com/vishvananda/netlink"
+	"golang.org/x/sys/unix"
+
+	"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
+)
+
+// GetNetlinkAddress returns a netlink address configured with specific
+// egress ip parameters
+func GetNetlinkAddress(ip net.IP, ifindex int) *netlink.Addr {
+	return &netlink.Addr{
+		IPNet:     &net.IPNet{IP: ip, Mask: util.GetIPFullMask(ip)},
+		Flags:     getNetlinkAddressFlag(ip),
+		Scope:     int(netlink.SCOPE_UNIVERSE),
+		ValidLft:  getNetlinkAddressValidLft(ip),
+		LinkIndex: ifindex,
+	}
+}
+
+func getNetlinkAddressFlag(ip net.IP) int {
+	// isV6?
+	if ip != nil && ip.To4() == nil && ip.To16() != nil {
+		return unix.IFA_F_NODAD
+	}
+	return 0
+}
+
+func getNetlinkAddressValidLft(ip net.IP) int {
+	// isV6?
+	if ip != nil && ip.To4() == nil && ip.To16() != nil {
+		return math.MaxUint32
+	}
+	return 0
+}
diff --git a/helm/ovn-kubernetes/values-multi-node-zone.yaml b/helm/ovn-kubernetes/values-multi-node-zone.yaml
index 8056461256..e3d1c33db7 100644
--- a/helm/ovn-kubernetes/values-multi-node-zone.yaml
+++ b/helm/ovn-kubernetes/values-multi-node-zone.yaml
@@ -14,7 +14,7 @@ tags:
 
 # -- Endpoint of Kubernetes api server
 k8sAPIServer: https://172.25.0.2:6443
-# -- IP range for Kubernetes pods, /14 is the top level range, under which each /23 range will be assigned to a node
+# -- IP range for Kubernetes pods, /16 is the top level range, under which each /24 range will be assigned to a node
 podNetwork: 10.244.0.0/16/24
 # -- A comma-separated set of CIDR notation IP ranges from which k8s assigns service cluster IPs. This should be the same as the value provided for kube-apiserver "--service-cluster-ip-range" option
 serviceNetwork: 10.96.0.0/16
diff --git a/helm/ovn-kubernetes/values-no-ic.yaml b/helm/ovn-kubernetes/values-no-ic.yaml
index f643f81133..a02d849cd9 100644
--- a/helm/ovn-kubernetes/values-no-ic.yaml
+++ b/helm/ovn-kubernetes/values-no-ic.yaml
@@ -12,7 +12,7 @@ tags:
 
 # -- Endpoint of Kubernetes api server
 k8sAPIServer: https://172.25.0.2:6443
-# -- IP range for Kubernetes pods, /14 is the top level range, under which each /23 range will be assigned to a node
+# -- IP range for Kubernetes pods, /16 is the top level range, under which each /24 range will be assigned to a node
 podNetwork: 10.244.0.0/16/24
 # -- A comma-separated set of CIDR notation IP ranges from which k8s assigns service cluster IPs. This should be the same as the value provided for kube-apiserver "--service-cluster-ip-range" option
 serviceNetwork: 10.96.0.0/16
diff --git a/helm/ovn-kubernetes/values-single-node-zone.yaml b/helm/ovn-kubernetes/values-single-node-zone.yaml
index 516b77220b..3787ad6823 100644
--- a/helm/ovn-kubernetes/values-single-node-zone.yaml
+++ b/helm/ovn-kubernetes/values-single-node-zone.yaml
@@ -14,7 +14,7 @@ tags:
 
 # -- Endpoint of Kubernetes api server
 k8sAPIServer: https://172.25.0.2:6443
-# -- IP range for Kubernetes pods, /14 is the top level range, under which each /23 range will be assigned to a node
+# -- IP range for Kubernetes pods, /16 is the top level range, under which each /24 range will be assigned to a node
 podNetwork: 10.244.0.0/16/24
 # -- A comma-separated set of CIDR notation IP ranges from which k8s assigns service cluster IPs. This should be the same as the value provided for kube-apiserver "--service-cluster-ip-range" option
 serviceNetwork: 10.96.0.0/16
diff --git a/mkdocs.yml b/mkdocs.yml
index 45f5a277e0..925f2f96d6 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -155,5 +155,7 @@ nav:
     - Preconfigured UDN Addresses: okeps/okep-5233-preconfigured-udn-addresses.md
     - BGP: okeps/okep-5296-bgp.md
     - Layer2TransitRouter: okeps/okep-5094-layer2-transit-router.md
+    - MCP for Troubleshooting: okeps/okep-5494-ovn-kubernetes-mcp-server.md
+    - Dynamic UDN Node Allocation: okeps/okep-5552-dynamic-udn-node-allocation.md
   - Blog:
     - blog/index.md
diff --git a/test/e2e/kubevirt.go b/test/e2e/kubevirt.go
index 2dfaf5e2e2..c545a1959c 100644
--- a/test/e2e/kubevirt.go
+++ b/test/e2e/kubevirt.go
@@ -998,7 +998,7 @@ var _ = Describe("Kubevirt Virtual Machines", feature.VirtualMachineSupport, fun
 				"kubevirt.io/allow-pod-bridge-network-live-migration": "",
 			}
 			nodeSelector := map[string]string{
-				namespace: "",
+				namespace: "true",
 			}
 			networkSource := kubevirtv1.NetworkSource{
 				Pod: &kubevirtv1.PodNetwork{},
@@ -1119,7 +1119,7 @@ passwd:
 				by(vm.Name, "Live migrate for the third time to the node owning the subnet")
 				// Patch back the original node with the label and remove it
 				// from the rest of nodes to force live migration target to it.
-				e2enode.AddOrUpdateLabelOnNode(fr.ClientSet, originalNode, namespace, "")
+				e2enode.AddOrUpdateLabelOnNode(fr.ClientSet, originalNode, namespace, "true")
 				for _, selectedNode := range selectedNodes {
 					if selectedNode.Name != originalNode {
 						e2enode.RemoveLabelOffNode(fr.ClientSet, selectedNode.Name, namespace)
@@ -1405,7 +1405,7 @@ fi
 			// configure VM nodeSelector with it and live migration will take only
 			// them into consideration
 			for _, node := range selectedNodes {
-				e2enode.AddOrUpdateLabelOnNode(fr.ClientSet, node.Name, namespace, "")
+				e2enode.AddOrUpdateLabelOnNode(fr.ClientSet, node.Name, namespace, "true")
 			}
 
 			prepareHTTPServerPods(map[string]string{}, checkPodHasIPAtStatus)
diff --git a/test/e2e/network_segmentation_preconfigured_layer2.go b/test/e2e/network_segmentation_preconfigured_layer2.go
index 5f7a32c85c..ea360fa38e 100644
--- a/test/e2e/network_segmentation_preconfigured_layer2.go
+++ b/test/e2e/network_segmentation_preconfigured_layer2.go
@@ -121,7 +121,7 @@ var _ = Describe("Network Segmentation: Preconfigured Layer2 UDN", feature.Netwo
 				role:                "primary",
 				defaultGatewayIPs:   joinStrings("172.31.0.10", "2014:100:200::100"),
 				reservedCIDRs:       joinStrings("172.31.1.0/24", "2014:100:200::/122"),
-				infrastructureCIDRs: joinStrings("172.31.0.10/30", "2014:100:200::100/122"),
+				infrastructureCIDRs: joinStrings("172.31.0.8/30", "2014:100:200::100/122"),
 			},
 			expectedGatewayIPs: []string{"172.31.0.10", "2014:100:200::100"},
 		}),
@@ -137,6 +137,77 @@ var _ = Describe("Network Segmentation: Preconfigured Layer2 UDN", feature.Netwo
 		}),
 	)
 
+	type invalidAPITestConfig struct {
+		netConfig     *networkAttachmentConfigParams
+		expectedError interface{}
+	}
+	DescribeTable("unmasked reserved / infrastructure subnets are not allowed",
+		func(config invalidAPITestConfig) {
+			podIPs := filterCIDRs(f.ClientSet, config.netConfig.cidr)
+			if len(podIPs) == 0 {
+				Skip("IP family not supported in this environment")
+			}
+
+			By("creating the L2 network")
+			netConfig := config.netConfig
+
+			netConfig.namespace = f.Namespace.Name
+
+			udnManifest := generateUserDefinedNetworkManifest(netConfig, f.ClientSet)
+			cleanup, err := createManifest(netConfig.namespace, udnManifest)
+			Expect(err).To(MatchError(config.expectedError))
+			DeferCleanup(cleanup)
+		},
+		Entry("Layer2 with unmasked IPv4 reserved subnets", invalidAPITestConfig{
+			netConfig: &networkAttachmentConfigParams{
+				name:          "invalid-l2-net-reserved-subnets",
+				topology:      "layer2",
+				cidr:          "172.31.0.0/16",
+				role:          "primary",
+				reservedCIDRs: "172.31.0.10/30",
+			},
+			expectedError: ContainSubstring(
+				"Invalid value: \"object\": reservedSubnets must be a masked network address (no host bits set)",
+			),
+		}),
+		Entry("Layer2 with unmasked IPv6 reserved subnets", invalidAPITestConfig{
+			netConfig: &networkAttachmentConfigParams{
+				name:          "invalid-l2-net-reserved-subnets",
+				topology:      "layer2",
+				cidr:          "2014:100:200::0/60",
+				role:          "primary",
+				reservedCIDRs: "2014:100:200::88/122",
+			},
+			expectedError: ContainSubstring(
+				"Invalid value: \"object\": reservedSubnets must be a masked network address (no host bits set)",
+			),
+		}),
+		Entry("Layer2 with unmasked IPv4 infrastructure subnets", invalidAPITestConfig{
+			netConfig: &networkAttachmentConfigParams{
+				name:                "invalid-l2-net-infra-subnets",
+				topology:            "layer2",
+				cidr:                "172.31.0.0/16",
+				role:                "primary",
+				infrastructureCIDRs: "172.31.0.10/30",
+			},
+			expectedError: ContainSubstring(
+				"Invalid value: \"object\": infrastructureSubnets must be a masked network address (no host bits set)",
+			),
+		}),
+		Entry("Layer2 with unmasked IPv6 infrastructure subnets", invalidAPITestConfig{
+			netConfig: &networkAttachmentConfigParams{
+				name:                "invalid-l2-net-infra-subnets",
+				topology:            "layer2",
+				cidr:                "2014:100:200::0/60",
+				role:                "primary",
+				infrastructureCIDRs: "2014:100:200::88/122",
+			},
+			expectedError: ContainSubstring(
+				"Invalid value: \"object\": infrastructureSubnets must be a masked network address (no host bits set)",
+			),
+		}),
+	)
+
 	Context("duplicate IP validation with primary UDN layer 2 pods", func() {
 		const (
 			duplicateIPv4 = "10.128.0.200/16"