diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d9d5d40eec..cd0a35e4c3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -467,7 +467,7 @@ jobs: - {"target": "node-ip-mac-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-disabled"} - {"target": "node-ip-mac-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} - {"target": "compact-mode", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-disabled"} - - {"target": "multi-homing", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} + - {"target": "multi-homing", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "network-segmentation": "enable-network-segmentation"} - {"target": "multi-node-zones", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-multi-node-zones", "num-workers": "3", "num-nodes-per-zone": "2"} - {"target": "external-gateway", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "2br", "ic": "ic-single-node-zones"} - {"target": "external-gateway", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} @@ -513,6 +513,7 @@ jobs: TRAFFIC_FLOW_TESTS: "${{ matrix.traffic-flow-tests }}" ENABLE_ROUTE_ADVERTISEMENTS: "${{ matrix.routeadvertisements != '' }}" ADVERTISE_DEFAULT_NETWORK: "${{ matrix.routeadvertisements == 'advertise-default' }}" + ENABLE_PRE_CONF_UDN_ADDR: "${{ ( ( matrix.target == 'multi-homing' && matrix.network-segmentation == 'enable-network-segmentation' ) || matrix.target == 'kv-live-migration' ) && matrix.ic == 'ic-single-node-zones' }}" steps: - name: Install VRF kernel module diff --git a/MEETINGS.md b/MEETINGS.md index 701459788f..8964025628 100644 --- a/MEETINGS.md +++ b/MEETINGS.md @@ -6,7 +6,7 @@ All are welcome to join our meetings! If you want to discuss something with the ## Meeting time -We meet alternate Monday's at 6:00 PM CET/CEST. +We meet alternate Monday's at 5:00 PM CET/CEST. In order to figure out when our next meeting is, please check our agenda for previous meeting history. The meetings last up to 1 hour. diff --git a/contrib/kind.sh b/contrib/kind.sh index af1c0f537c..8fbdbbebdf 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -858,9 +858,6 @@ build_ovn_image() { if [ "$OVN_IMAGE" == local ]; then set_ovn_image - # Build binaries - make -C ${DIR}/../go-controller - # Build image make -C ${DIR}/../dist/images IMAGE="${OVN_IMAGE}" OVN_REPO="${OVN_REPO}" OVN_GITREF="${OVN_GITREF}" OCI_BIN="${OCI_BIN}" fedora-image diff --git a/dist/images/Dockerfile.fedora b/dist/images/Dockerfile.fedora index 4ca51e888f..fb656a2a6c 100644 --- a/dist/images/Dockerfile.fedora +++ b/dist/images/Dockerfile.fedora @@ -93,7 +93,7 @@ RUN echo "Running on $BUILDPLATFORM, building for $TARGETPLATFORM" # Final stage RUN dnf install --best --refresh -y --setopt=tsflags=nodocs koji -RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] || [ -z "$TARGETPLATFORM"] ; then koji download-build $ovnver --arch=x86_64 ; \ +RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] || [ -z "$TARGETPLATFORM" ] ; then koji download-build $ovnver --arch=x86_64 ; \ else koji download-build $ovnver --arch=aarch64 ; fi ###################################### diff --git a/dist/images/Dockerfile.ubuntu b/dist/images/Dockerfile.ubuntu index 10addc57d4..7fedefa624 100644 --- a/dist/images/Dockerfile.ubuntu +++ b/dist/images/Dockerfile.ubuntu @@ -8,14 +8,12 @@ # # So this file will change over time. -FROM ubuntu:24.10 +FROM ubuntu:25.04 USER root RUN apt-get update && apt-get install -y iproute2 curl software-properties-common util-linux nftables -RUN curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - - # Install OVS and OVN packages. RUN apt-get update && apt-get install -y openvswitch-switch openvswitch-common ovn-central ovn-common ovn-host diff --git a/dist/images/Dockerfile.ubuntu.arm64 b/dist/images/Dockerfile.ubuntu.arm64 index 48a408b036..3830641cf0 100644 --- a/dist/images/Dockerfile.ubuntu.arm64 +++ b/dist/images/Dockerfile.ubuntu.arm64 @@ -8,14 +8,12 @@ # # So this file will change over time. -FROM ubuntu:24.10 +FROM ubuntu:25.04 USER root RUN apt-get update && apt-get install -y iproute2 curl software-properties-common util-linux nftables -RUN curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - - # Install OVS and OVN packages. RUN apt-get update && apt-get install -y openvswitch-switch openvswitch-common ovn-central ovn-common ovn-host diff --git a/docs/design/topology.md b/docs/design/topology.md index 36dc78b3e1..d3353a4487 100644 --- a/docs/design/topology.md +++ b/docs/design/topology.md @@ -66,7 +66,3 @@ It is distributed across the nodes in the cluster and is responsible for routing traffic between the different zones. FIXME: This page is lazily written, there is so much more to do here. - -## References - -* https://docs.google.com/presentation/d/1BtkYAO30gI3v6ah2hS6XTGtt6JBHNRHh64vhGEtfLEM/edit#slide=id.gfb215b3717_0_3299 \ No newline at end of file diff --git a/docs/features/multiple-networks/mirrored-endpointslices.md b/docs/features/multiple-networks/mirrored-endpointslices.md deleted file mode 100644 index 39f8615779..0000000000 --- a/docs/features/multiple-networks/mirrored-endpointslices.md +++ /dev/null @@ -1,135 +0,0 @@ -# EndpointSlices mirror controller for User-Defined Networks - -## Summary - -Pods that use a [user-defined network](https://github.com/trozet/enhancements/blob/multiple_networks/enhancements/network/user-defined-network-segmentation.md) as their primary network will still have the cluster default network IP in their status. For services this results in the EndpointSlices providing the IPs of the cluster default network in the Kubernetes API. To enable services support for primary user-defined networks, the EndpointSlices mirror controller was introduced to create custom EndpointSlices with user-defined network IP addresses extracted from OVN-Kubernetes annotations. - -## Implementation - -The introduced controller duplicates the default EndpointSlices, creating new copies that include IP addresses from primary user-defined network. It bypasses EndpointSlices in namespaces that do not have a user-defined primary network. The controller lacks specific logic for selecting endpoints, it only replicates those generated by the default controller and replaces the IP addresses. For host-networked pods, the controller retains the same IP addresses as the default controller. Custom EndpointSlices not created by the default controller are not processed. - -The default EndpointSlices controller creates objects that contain the following labels: - -- `endpointslice.kubernetes.io/managed-by:endpointslice-controller.k8s.io` - Indicates that the EndpointSlice is managed by the default Kubernetes EndpointSlice controller. -- `kubernetes.io/service-name:` - The service that this EndpointSlice belongs to, used by the default network service controller. - -The EndpointSlices mirror controller uses a separate set of labels: - -- `endpointslice.kubernetes.io/managed-by:endpointslice-mirror-controller.k8s.ovn.org` - Indicates that the EndpointSlice is managed by the mirror controller. -- `k8s.ovn.org/service-name:` - The service that this mirrored EndpointSlice belongs to, used by the user-defined network service controller. Note that the label key is different from the default EndpointSlice. -- `k8s.ovn.org/source-endpointslice-version:` - The last reconciled resource version from the default EndpointSlice. - -and annotations (Label values have a length limit of 63 characters): -- `k8s.ovn.org/endpointslice-network:` - The user-defined network that the IP addresses in the mirrored EndpointSlice belong to. -- `k8s.ovn.org/source-endpointslice:` - The name of the default EndpointSlice that was the source of the mirrored EndpointSlice. - - -### Example - -With the following NetworkAttachmentDefinition: - -```yaml -apiVersion: k8s.cni.cncf.io/v1 -kind: NetworkAttachmentDefinition -metadata: - name: l3-network - namespace: nad-l3 -spec: - config: |2 - { - "cniVersion": "1.0.0", - "name": "l3-network", - "type": "ovn-k8s-cni-overlay", - "topology":"layer3", - "subnets": "10.128.0.0/16/24", - "mtu": 1300, - "netAttachDefName": "nad-l3/l3-network", - "role": "primary" - } -``` - -We can observe the following EndpointSlices created for a one-replica deployment exposed through a `sample-deployment` service: - - - - - - - - - -
Default EndpointSliceMirrored EndpointSlice
- -```yaml -kind: EndpointSlice -apiVersion: discovery.k8s.io/v1 -metadata: - name: sample-deployment-rkk4n - generateName: sample-deployment- - generation: 1 - labels: - app: l3pod - endpointslice.kubernetes.io/managed-by: endpointslice-controller.k8s.io - kubernetes.io/service-name: sample-deployment - name: sample-deployment-rkk4n - namespace: nad-l3 - resourceVersion: "31533" -addressType: IPv4 -endpoints: -- addresses: - - 10.244.1.17 - conditions: - ready: true - serving: true - terminating: false - nodeName: ovn-worker - targetRef: - kind: Pod - name: sample-deployment-6b64bd4868-7ftt6 - namespace: nad-l3 - uid: 6eb5d05c-cff4-467d-bc1b-890443750463 -ports: -- name: "" - port: 80 - protocol: TCP -``` - - - -```yaml -kind: EndpointSlice -apiVersion: discovery.k8s.io/v1 -metadata: - name: l3-network-sample-deployment-hgkmw - generateName: l3-network-sample-deployment- - labels: - endpointslice.kubernetes.io/managed-by: endpointslice-mirror-controller.k8s.ovn.org - k8s.ovn.org/service-name: sample-deployment - k8s.ovn.org/source-endpointslice-version: "31533" - annotations: - k8s.ovn.org/endpointslice-network: l3-network - k8s.ovn.org/source-endpointslice: sample-deployment-rkk4n - namespace: nad-l3 - resourceVersion: "31535" -addressType: IPv4 -endpoints: -- addresses: - - 10.128.1.3 - conditions: - ready: true - serving: true - terminating: false - nodeName: ovn-worker - targetRef: - kind: Pod - name: sample-deployment-6b64bd4868-7ftt6 - namespace: nad-l3 - uid: 6eb5d05c-cff4-467d-bc1b-890443750463 -ports: -- name: "" - port: 80 - protocol: TCP - -``` - -
diff --git a/docs/features/user-defined-networks/images/KubeletHealthchecks-Part1.png b/docs/features/user-defined-networks/images/KubeletHealthchecks-Part1.png new file mode 100644 index 0000000000..4ca1233bc7 Binary files /dev/null and b/docs/features/user-defined-networks/images/KubeletHealthchecks-Part1.png differ diff --git a/docs/features/user-defined-networks/images/KubeletHealthchecks-Part2.png b/docs/features/user-defined-networks/images/KubeletHealthchecks-Part2.png new file mode 100644 index 0000000000..8426824bd0 Binary files /dev/null and b/docs/features/user-defined-networks/images/KubeletHealthchecks-Part2.png differ diff --git a/docs/features/user-defined-networks/images/L2DeepDive-2segments.png b/docs/features/user-defined-networks/images/L2DeepDive-2segments.png new file mode 100644 index 0000000000..b04dda3f11 Binary files /dev/null and b/docs/features/user-defined-networks/images/L2DeepDive-2segments.png differ diff --git a/docs/features/user-defined-networks/images/L3DeepDive.png b/docs/features/user-defined-networks/images/L3DeepDive.png new file mode 100644 index 0000000000..c28fa0a93c Binary files /dev/null and b/docs/features/user-defined-networks/images/L3DeepDive.png differ diff --git a/docs/features/user-defined-networks/images/Layer2VMMigration.png b/docs/features/user-defined-networks/images/Layer2VMMigration.png new file mode 100644 index 0000000000..992e9637d7 Binary files /dev/null and b/docs/features/user-defined-networks/images/Layer2VMMigration.png differ diff --git a/docs/features/user-defined-networks/images/localnet-topology.png b/docs/features/user-defined-networks/images/localnet-topology.png new file mode 100644 index 0000000000..9d3782ca51 Binary files /dev/null and b/docs/features/user-defined-networks/images/localnet-topology.png differ diff --git a/docs/features/user-defined-networks/images/native-namespace-isolation.png b/docs/features/user-defined-networks/images/native-namespace-isolation.png new file mode 100644 index 0000000000..adeb18d565 Binary files /dev/null and b/docs/features/user-defined-networks/images/native-namespace-isolation.png differ diff --git a/docs/features/user-defined-networks/images/overlappingpodIPs.png b/docs/features/user-defined-networks/images/overlappingpodIPs.png new file mode 100644 index 0000000000..be29b67092 Binary files /dev/null and b/docs/features/user-defined-networks/images/overlappingpodIPs.png differ diff --git a/docs/features/user-defined-networks/images/tenant-isolation-lighter.png b/docs/features/user-defined-networks/images/tenant-isolation-lighter.png new file mode 100644 index 0000000000..11cfb812bb Binary files /dev/null and b/docs/features/user-defined-networks/images/tenant-isolation-lighter.png differ diff --git a/docs/features/user-defined-networks/user-defined-networks.md b/docs/features/user-defined-networks/user-defined-networks.md new file mode 100644 index 0000000000..1b0393bd5e --- /dev/null +++ b/docs/features/user-defined-networks/user-defined-networks.md @@ -0,0 +1,692 @@ +# User Defined Networks + +## Introduction + +User Defined Networks (UDNs) in OVN-Kubernetes offer flexible network configurations +for users, going beyond the traditional single default network model for all pods +within a Kubernetes cluster. This feature addresses the diverse and advanced networking +requirements of various applications and use cases. + +## Motivation + +Traditional Kubernetes networking, which typically connects all pods to a default Layer3 network, +lacks the necessary flexibility for many modern use cases and advanced network capabilities. +UDNs provide several key advantages: + +* **Workload/Tenant Isolation**: UDNs enable the grouping of different application +types into isolated networks within the cluster, preventing communication between them. +* **Flexible Network Topologies**: Users can create different types of overlay networks +that suits their use cases and then attach their workloads to these networks which are +then isolated natively. +* **Overlapping Pod IPs**: UDNs allow the creation of multiple networks within a cluster that +can use the same IP address ranges for pods, expanding deployment scenarios. + +See the [enhancement] for more details. + +[enhancement]: https://ovn-kubernetes.io/okeps/okep-5193-user-defined-networks/ + +### User-Stories/Use-Cases + +See the [user-stories] defined in the enhancement. + +[user-stories]: https://ovn-kubernetes.io/okeps/okep-5193-user-defined-networks/#user-storiesuse-cases + +The two main user stories are: + +#### Native Namespace Isolation using Networks + +![namespace-isolation](images/native-namespace-isolation.png) +Here the blue, green, purple and yellow networks within those +namespaces cannot reach other and hence provide native isolation +to the workloads in those networks from workloads in other networks. + +#### Native Tenant Isolation using Networks + +![tenant-isolation](images/tenant-isolation-lighter.png) +Here the tenants BERLIN and MUNICH are isolated from each other. +So the workloads in namespaces belonging to BERLIN across the +four namespaces - purple, yellow, green and blue can talk to each other +but they can't talk to the workloads belogning to MUNICH tenant +across namespaces brown, cyan, orange and violet. + +There are more user stories which will be covered in the sections below +with appropriate diagrams. + +## How to enable this feature on an OVN-Kubernetes cluster? + +This feature is enabled by default on all OVN-Kubernetes clusters. +You don't need to do anything extra to start using this feature. +There is a Feature Config option `--enable-network-segmentation` under +`OVNKubernetesFeatureConfig` config that can be used to disable this +feature. However note that disabling the feature will not remove +existing CRs in the cluster. This feature has to be enabled along with +the flag for multiple-networks `--enable-multi-network` since UDNs +use Network Attachment Definitions as underlying implementation detail +construct and reuse the secondary network controllers. + +## Workflow Description + +A tenant consists of one or more namespaces in a cluster. Network segmentation +can be achieved by attaching 1 or more namespaces as part of same network which +are then not reachable from other namespaces in the cluster that are not part +of that network. + +## Implementation Details + +### User facing API Changes + +The implementation of UDNs introduces two new Custom Resource Definitions (CRDs) +for network creation: + +* Namespace-scoped **UserDefinedNetwork** (UDN): This CRD is for tenant owners, +allowing them to create networks within their namespace. This provides isolation +for their namespaces from other tenants' namespaces. + +* Cluster-scoped **ClusterUserDefinedNetwork** (CUDN): This CRD provides cluster +administrators with the ability to allow multiple namespaces to be part of the +same network that is then isolated from other networks. + +**NOTE**: For a namespace to be considered for UDN creation, it must be +labeled with `k8s.ovn.org/primary-user-defined-network` at the time of its +creation. This label cannot be updated later, and if absent, the namespace +will not be considered for UDN creation. + +See the [api-specification-docs] for information on each of the fields + +[api-specification-docs]: https://ovn-kubernetes.io/api-reference/userdefinednetwork-api-spec/ + +### OVN-Kubernetes Implementation Details + +`UserDefinedNetworks` is an opinionated implementation +of multi-networking in Kubernetes. There are two types of +UserDefinedNetworks: + +* `Primary`: Also known as P-UDN -> Primary UserDefinedNetwork: This means the + network will act as the primary network for the pod and all default traffic + will pass through this network except for Kubelet healthchecks which still uses + the default cluster-wide network as Kubernetes is not multi-networking aware. +* `Secondary`: Also known as S-UDN -> Secondary UserDefinedNetwork: This means the + network will act as only a secondary network for the pod and only pod traffic + that is part of the secondary network may be routed through this interface. These + types of networks have existed for a long time usually created using + `NetworkAttachmentDefinitions` API but are now more standardised using UDN CRs. + +OVN-Kubernetes currently doesn't support north-south traffic for +secondary networks and none of the core Kubernetes features like Services will work there. +Primary networks on the other hand has full support for all features as present +on cluster default network. + +UDNs can have flexible virtual network topologies to suit the use cases +of end users. Currently supported topology types for a given network include: + +**Layer3 Networks** + +`Layer3`: is a topology type wherein the pods or VMs are connected to their +node’s local router and all these routers are then connected to the distributed +switch across nodes. + * Each pod would hence get an IP from the node's subnet segment + * When in doubt which topology to use go with layer3 which is the same topology + as the cluster default network + * Can be of type `primary` or `secondary` + +Let's see how a Layer3 Network looks on the OVN layer. + +![l3-UDN](images/L3DeepDive.png) + +Here we can see a blue and green P-UDN. On node1, pod1 is part of green UDN and +pod2 is part of blue UDN. They each have a udn-0 interface that is attached to +the UDN network and a eth0 interface that is attached to the cluster default +network (grey color) which is only used for kubelet healthchecks. + +**Layer2 Networks** + +`Layer2`: is a topology type wherein the pods or VMs are all connected to the +same layer2 flat switch. + * Usually used when the applications deployed expect a layer2 type network + connection (Perhaps applications want a single broadcast domain, latency sensitive, use proprietary L2 protocols) + * Common in Virtualization world for seamless migration of the VM since + persistent IPs of the VMs can be preserved across nodes in your cluster + during live migration + * Can be of type `primary` or `secondary` + +![l2-UDN](images/L2DeepDive-2segments.png) + +Here we can see a blue and green P-UDN. On node1, pod1 is part of green UDN and +pod2 is part of blue UDN. They each have a udn-0 interface that is attached to +the UDN network and a eth0 interface that is attached to the cluster default +network (grey color) which is only used for kubelet healthchecks. + +**Localnet Networks** + +`Localnet`: is a topology type wherein the pods or VMs attached to a localnet +network on the overlay can egress to the provider’s physical network + * without SNATing to nodeIPs… preserves the podIPs + * podIPs can be on the same subnet as the provider’s VLAN + * VLAN IDs can be used to mark the traffic coming from the localnet for + isolation on provider network + * Can be of type `secondary`, it cannot be a `primary` network of a pod. + * Only `ClusterUserDefinedNetwork` supports `localnet` + +![localnet-UDN](images/localnet-topology.png) + +Here we can see blue and green S-UDN localnet networks. + +The ovnkube-cluster-manager component watches for these CR's and the controller +reacts to it by creating NADs under the hood. The ovnkube-controller watches for +the NADs and creates the required OVN logical constructs in the OVN database. +The ovnkube-node also adds the required gateway plumbing such as openflows and +VRF tables and routes to provide networking to these networks. + +### Creating UserDefinedNetworks + +Now that we understand what a UDN is, let's get handson! + +Let's create two namespaces `blue` and `green`: + +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: blue + labels: + name: blue + k8s.ovn.org/primary-user-defined-network: "" +--- +apiVersion: v1 +kind: Namespace +metadata: + name: green + labels: + name: green + k8s.ovn.org/primary-user-defined-network: "" +``` + +Sample API yaml for create two `UserDefinedNetworks` of type `Layer3` in these namespaces: + +```yaml +apiVersion: k8s.ovn.org/v1 +kind: UserDefinedNetwork +metadata: + name: blue-network + namespace: blue + labels: + name: blue + purpose: kubecon-eu-2025-demo +spec: + topology: Layer3 + layer3: + role: Primary + subnets: + - cidr: 103.103.0.0/16 + hostSubnet: 24 +--- +apiVersion: k8s.ovn.org/v1 +kind: UserDefinedNetwork +metadata: + name: green-network + namespace: green + labels: + name: green + purpose: kubecon-eu-2025-demo +spec: + topology: Layer3 + layer3: + role: Primary + subnets: + - cidr: 203.203.0.0/16 + hostSubnet: 24 +``` + +### Inspecting a UDN Pod + +Now if you create pods on these two namespaces and try to ping one pod from +the other pod, you will see that connection won't work. + +``` + $ k get pods -n blue -owide + NAME READY STATUS RESTARTS AGE IP NODE + blue 1/1 Running 0 9h 10.244.0.7 ovn-worker + blue1 1/1 Running 0 8h 10.244.1.4 ovn-worker2 + + $ k get pods -n green -owide + NAME READY STATUS RESTARTS AGE IP NODE + green 1/1 Running 0 9h 10.244.0.6 ovn-worker +``` + +NOTE: Doing kubectl get pods and describe pod will all show the default network +podIP which is not to be confused with the UDN podIPs. Remember how we said +Kubernetes is not multi-networking aware? Hence pod.Status.IPs will always +be the IPs that kubelet is aware of for healthchecks to work. + +In order to see the real UDN PodIPs, always do a describe on the pod and see +the following annotations on the pod: +``` +$ k get pod -n green green -oyaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.244.0.6/24"], + "mac_address":"0a:58:0a:f4:00:06","routes":[{"dest":"10.244.0.0/16", + "nextHop":"10.244.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.244.0.1"}], + "ip_address":"10.244.0.6/24","role":"infrastructure-locked"}, + "green/green-network":{"ip_addresses":["203.203.2.5/24"], + "mac_address":"0a:58:c8:0a:02:05","gateway_ips":["203.203.2.1"], + "routes":[{"dest":"203.203.0.0/16","nextHop":"203.203.2.1"}, + {"dest":"10.96.0.0/16","nextHop":"203.203.2.1"},{"dest":"100.65.0.0/16", + "nextHop":"203.203.2.1"}],"ip_address":"203.203.2.5/24","gateway_ip":"203.203.2.1", + "role":"primary"},"green/green-secondary-network":{"ip_addresses":["100.10.1.7/24"], + "mac_address":"0a:58:64:0a:01:07","routes":[{"dest":"100.10.0.0/16", + "nextHop":"100.10.1.1"}],"ip_address":"100.10.1.7/24","role":"secondary"}}' +``` +The above shows the OVN-Kubernetes IPAM Annotation for each type of network: +* `default` which is the cluster-wide `infrastructure-locked` network only used + for Kubelet health checks and pod has IP 10.244.0.6 here +* `primary` which is the primary UDN for the pod through which all traffic + passes through and pod has IP 203.203.2.5. +* `secondary` which is the secondary UDN network for the pod from which pod has IP 100.10.1.7 + +One can also use the multus annotation to figure out the podIPs on each interface: + +``` +$ oc get pod -n green green -oyaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + k8s.v1.cni.cncf.io/network-status: |- + [{ + "name": "ovn-kubernetes", + "interface": "eth0", + "ips": [ + "10.244.0.6" + ], + "mac": "0a:58:0a:f4:00:06", + "dns": {} + },{ + "name": "ovn-kubernetes", + "interface": "ovn-udn1", + "ips": [ + "200.203.2.5" + ], + "mac": "0a:58:c8:0a:02:05", + "default": true, + "dns": {} + },{ + "name": "green/green-secondary-network", + "interface": "net1", + "ips": [ + "100.10.1.7" + ], + "mac": "0a:58:64:0a:01:07", + "dns": {} + }] +``` + +### KubeletHealthChecks for UDN pods + +In each of the above diagrams we saw a grey network still attached to all +pods across all UDNs. This represents the cluster default network which +is `infrastructure-locked` for primary-UDN pods and is only used for healthchecks. + +We add UDN Isolation ACLs and cgroups NFTable rules on these pod ports so that +no traffic except healthcheck traffic from kubelet is allowed to reach these pods. + +Using OVN ACLs, we ensure only traffic from kubelet is allowed on the +default `eth0` interface of the pods: + +``` +_uuid : 1278b0f4-0a14-4637-9d05-83ba9df6ec03 +action : allow +direction : from-lport +external_ids : {direction=Egress, "k8s.ovn.org/id"="default-network-controller:UDNIsolation:AllowHostARPSecondary:Egress", "k8s.ovn.org/name"=AllowHostARPSecondary, "k8s.ovn.org/owner-controller"=default-network-controller, "k8s.ovn.org/owner-type"=UDNIsolation} +label : 0 +log : false +match : "inport == @a8747502060113802905 && (( arp && arp.tpa == 10.244.2.2 ) || ( nd && nd.target == fd00:10:244:3::2 ))" +meter : acl-logging +name : [] +options : {} +priority : 1001 +sample_est : [] +sample_new : [] +severity : [] +tier : 0 + +_uuid : 489ae95b-ae9d-47d0-bf1d-b2477a9ed6a2 +action : allow +direction : to-lport +external_ids : {direction=Ingress, "k8s.ovn.org/id"="default-network-controller:UDNIsolation:AllowHostARPSecondary:Ingress", "k8s.ovn.org/name"=AllowHostARPSecondary, "k8s.ovn.org/owner-controller"=default-network-controller, "k8s.ovn.org/owner-type"=UDNIsolation} +label : 0 +log : false +match : "outport == @a8747502060113802905 && (( arp && arp.spa == 10.244.2.2 ) || ( nd && nd.target == fd00:10:244:3::2 ))" +meter : acl-logging +name : [] +options : {} +priority : 1001 +sample_est : [] +sample_new : [] +severity : [] +tier : 0 + + +_uuid : 980be3e4-75af-45f7-bce3-3bb08ecd8b3a +action : drop +direction : to-lport +external_ids : {direction=Ingress, "k8s.ovn.org/id"="default-network-controller:UDNIsolation:DenySecondary:Ingress", "k8s.ovn.org/name"=DenySecondary, "k8s.ovn.org/owner-controller"=default-network-controller, "k8s.ovn.org/owner-type"=UDNIsolation} +label : 0 +log : false +match : "outport == @a8747502060113802905" +meter : acl-logging +name : [] +options : {} +priority : 1000 +sample_est : [] +sample_new : [] +severity : [] +tier : 0 + +_uuid : cca19dca-1fde-4a14-841d-7e2cce804de4 +action : drop +direction : from-lport +external_ids : {direction=Egress, "k8s.ovn.org/id"="default-network-controller:UDNIsolation:DenySecondary:Egress", "k8s.ovn.org/name"=DenySecondary, "k8s.ovn.org/owner-controller"=default-network-controller, "k8s.ovn.org/owner-type"=UDNIsolation} +label : 0 +log : false +match : "inport == @a8747502060113802905" +meter : acl-logging +name : [] +options : {} +priority : 1000 +sample_est : [] +sample_new : [] +severity : [] +tier : 0 +``` + +![kubelet-healthchecks-part1](images/KubeletHealthchecks-Part1.png) + +As you can see here a default network pod, `pod2` can't reach +the UDN pod `pod1` via its eth0 interface thanks to the ACLs in place. +So no traffic from the UDN pod ever leaves via `eth0`. The only traffic +that is allowed via `eth0` interface is the kubelet probe traffic. + +But given how we have allow ACLs for kubelet traffic, but this matches +on management portIP which is the hostIP, any process on the host can +potentially reach the UDN pods. In order to have more tighter security, +we have cgroups based NFT rules on the host to prevent any non-kubelet +process from being able to reach the default network `eth0` port on +UDN pods. + +![kubelet-healthchecks-part2](images/KubeletHealthchecks-Part2.png) + +These rules look like this: + +``` + chain udn-isolation { + comment "Host isolation for user defined networks" + type filter hook output priority filter; policy accept; + ip daddr . meta l4proto . th dport @udn-open-ports-v4 accept + ip daddr @udn-open-ports-icmp-v4 meta l4proto icmp accept + socket cgroupv2 level 2 475436 ip daddr @udn-pod-default-ips-v4 accept + ip daddr @udn-pod-default-ips-v4 drop + ip6 daddr . meta l4proto . th dport @udn-open-ports-v6 accept + ip6 daddr @udn-open-ports-icmp-v6 meta l4proto ipv6-icmp accept + socket cgroupv2 level 2 475436 ip6 daddr @udn-pod-default-ips-v6 accept + ip6 daddr @udn-pod-default-ips-v6 drop + } + + set udn-open-ports-v4 { + type ipv4_addr . inet_proto . inet_service + comment "default network open ports of pods in user defined networks (IPv4)" + } + + set udn-open-ports-v6 { + type ipv6_addr . inet_proto . inet_service + comment "default network open ports of pods in user defined networks (IPv6)" + } + + set udn-open-ports-icmp-v4 { + type ipv4_addr + comment "default network IPs of pods in user defined networks that allow ICMP (IPv4)" + } + + set udn-open-ports-icmp-v6 { + type ipv6_addr + comment "default network IPs of pods in user defined networks that allow ICMP (IPv6)" + } + + set udn-pod-default-ips-v4 { + type ipv4_addr + comment "default network IPs of pods in user defined networks (IPv4)" + } + + set udn-pod-default-ips-v6 { + type ipv6_addr + comment "default network IPs of pods in user defined networks (IPv6)" + } +``` + +The only exception to this is when users annotate +the UDN pod using the `open-default-ports` annotation: +``` +k8s.ovn.org/open-default-ports: | + - protocol: tcp + port: 80 + - protocol: udp + port: 53 +``` +which means we open up allow ACLs and nftrules to allow traffic +to reach at those ports. + +### Overlapping PodIPs + +Two networks can have the same subnet since they are completely +isolated. We use a `masqueradeIP` SNAT per UDN to avoid conntrack +collisions on the host. So traffic leaving each UDN is SNATed to +a unique IP before being sent to the host. + +![overlapping-podips](images/overlappingpodIPs.png) + +### VM LiveMigration and PersistentIPs over Layer2 UDNs + +Users can use the `layer2` topology when creating virtual machines +on OVN-Kubernetes and can easily live migrate the VMs across nodes +along with preserving their IPs. + +![overlapping-podips](images/Layer2VMMigration.png) + +### Services on UDNs + +Creating a service on UDNs is same as creating them on default +network, no extra plumbing is required. + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: service-blue + namespace: blue + labels: + network: blue +spec: + type: LoadBalancer + selector: + network: blue + ports: + - name: web + port: 80 + targetPort: 8080 +``` +``` +$ k get svc -n blue +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service-blue LoadBalancer 10.96.207.175 172.19.0.10 80:31372/TCP 5s +$ k get endpointslice -n blue +NAME ADDRESSTYPE PORTS ENDPOINTS AGE +service-blue-55d6c IPv4 8080 103.103.1.5,103.103.0.5 65s +service-blue-pkll7 IPv4 8080 10.244.0.3,10.244.1.8 66s +``` +One set of endpoints show the UDN ntework IPs of the pods and the other set +shows default network IPs. + +When the service is created inside the blue namespace, the +clusterIPs get automatically isolated from pods in other networks. +However nodeports, loadbalancerIPs and externalIPs can be reached +across UDNs. + +### EndpointSlices mirror controller for User-Defined Networks + +Pods that use a UDN as their primary network will still have the cluster +default network IP in their status. For services this results in the EndpointSlices +providing the IPs of the cluster default network in the Kubernetes API. To enable +services support for primary user-defined networks, the EndpointSlices mirror +controller was introduced to create custom EndpointSlices with user-defined +network IP addresses extracted from OVN-Kubernetes annotations. + +The introduced controller duplicates the default EndpointSlices, creating +new copies that include IP addresses from primary user-defined network. It +bypasses EndpointSlices in namespaces that do not have a user-defined primary +network. The controller lacks specific logic for selecting endpoints, it only +replicates those generated by the default controller and replaces the IP addresses. +For host-networked pods, the controller retains the same IP addresses as the +default controller. Custom EndpointSlices not created by the default controller +are not processed. + +The default EndpointSlices controller creates objects that contain the following labels: + +- `endpointslice.kubernetes.io/managed-by:endpointslice-controller.k8s.io` - Indicates + that the EndpointSlice is managed by the default Kubernetes EndpointSlice controller. +- `kubernetes.io/service-name:` - The service that this EndpointSlice + belongs to, used by the default network service controller. + +The EndpointSlices mirror controller uses a separate set of labels: + +- `endpointslice.kubernetes.io/managed-by:endpointslice-mirror-controller.k8s.ovn.org` - Indicates + that the EndpointSlice is managed by the mirror controller. +- `k8s.ovn.org/service-name:` - The service that this mirrored EndpointSlice + belongs to, used by the user-defined network service controller. Note that the label + key is different from the default EndpointSlice. +- `k8s.ovn.org/source-endpointslice-version:` - The + last reconciled resource version from the default EndpointSlice. + +and annotations (Label values have a length limit of 63 characters): +- `k8s.ovn.org/endpointslice-network:` - The user-defined network + that the IP addresses in the mirrored EndpointSlice belong to. +- `k8s.ovn.org/source-endpointslice:` - The name of the + default EndpointSlice that was the source of the mirrored EndpointSlice. + +Example: + +With the following NetworkAttachmentDefinition: + +```yaml +apiVersion: k8s.cni.cncf.io/v1 +kind: NetworkAttachmentDefinition +metadata: + name: l3-network + namespace: nad-l3 +spec: + config: |2 + { + "cniVersion": "1.0.0", + "name": "l3-network", + "type": "ovn-k8s-cni-overlay", + "topology":"layer3", + "subnets": "10.128.0.0/16/24", + "mtu": 1300, + "netAttachDefName": "nad-l3/l3-network", + "role": "primary" + } +``` + +We can observe the following EndpointSlices created for a one-replica deployment +exposed through a `sample-deployment` service: + + + + + + + + + +
Default EndpointSliceMirrored EndpointSlice
+ +```yaml +kind: EndpointSlice +apiVersion: discovery.k8s.io/v1 +metadata: + name: sample-deployment-rkk4n + generateName: sample-deployment- + generation: 1 + labels: + app: l3pod + endpointslice.kubernetes.io/managed-by: endpointslice-controller.k8s.io + kubernetes.io/service-name: sample-deployment + name: sample-deployment-rkk4n + namespace: nad-l3 + resourceVersion: "31533" +addressType: IPv4 +endpoints: +- addresses: + - 10.244.1.17 + conditions: + ready: true + serving: true + terminating: false + nodeName: ovn-worker + targetRef: + kind: Pod + name: sample-deployment-6b64bd4868-7ftt6 + namespace: nad-l3 + uid: 6eb5d05c-cff4-467d-bc1b-890443750463 +ports: +- name: "" + port: 80 + protocol: TCP +``` + + + +```yaml +kind: EndpointSlice +apiVersion: discovery.k8s.io/v1 +metadata: + name: l3-network-sample-deployment-hgkmw + generateName: l3-network-sample-deployment- + labels: + endpointslice.kubernetes.io/managed-by: endpointslice-mirror-controller.k8s.ovn.org + k8s.ovn.org/service-name: sample-deployment + k8s.ovn.org/source-endpointslice-version: "31533" + annotations: + k8s.ovn.org/endpointslice-network: l3-network + k8s.ovn.org/source-endpointslice: sample-deployment-rkk4n + namespace: nad-l3 + resourceVersion: "31535" +addressType: IPv4 +endpoints: +- addresses: + - 10.128.1.3 + conditions: + ready: true + serving: true + terminating: false + nodeName: ovn-worker + targetRef: + kind: Pod + name: sample-deployment-6b64bd4868-7ftt6 + namespace: nad-l3 + uid: 6eb5d05c-cff4-467d-bc1b-890443750463 +ports: +- name: "" + port: 80 + protocol: TCP + +``` + +
+ +That's how behind the scenes services on UDNs are implemented. + +## References + +* Use the workshop yamls [here](https://github.com/tssurya/kubecon-eu-2025-london-udn-workshop/tree/main/manifests) to play around diff --git a/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go b/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go index 621d1e01d1..88d94faeb5 100644 --- a/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go +++ b/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go @@ -20,10 +20,12 @@ func main() { p := cni.NewCNIPlugin("") c.Action = func(_ *cli.Context) error { - skel.PluginMain( - p.CmdAdd, - p.CmdCheck, - p.CmdDel, + skel.PluginMainFuncs( + skel.CNIFuncs{ + Add: p.CmdAdd, + Check: p.CmdCheck, + Del: p.CmdDel, + }, version.All, bv.BuildString("ovn-k8s-cni-overlay")) return nil diff --git a/go-controller/go.mod b/go-controller/go.mod index 72e89c3b7a..0a4e54196b 100644 --- a/go-controller/go.mod +++ b/go-controller/go.mod @@ -10,7 +10,7 @@ require ( github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d github.com/bhendo/go-powershell v0.0.0-20190719160123-219e7fb4e41e github.com/cenkalti/backoff/v4 v4.3.0 - github.com/containernetworking/cni v1.1.2 + github.com/containernetworking/cni v1.2.3 github.com/containernetworking/plugins v1.2.0 github.com/coreos/go-iptables v0.6.0 github.com/fsnotify/fsnotify v1.7.0 @@ -25,7 +25,7 @@ require ( github.com/k8snetworkplumbingwg/govdpa v0.1.5-0.20230926073613-07c1031aea47 github.com/k8snetworkplumbingwg/ipamclaims v0.5.0-alpha github.com/k8snetworkplumbingwg/multi-networkpolicy v1.0.1 - github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0 + github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.7.7 github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc github.com/mdlayher/arp v0.0.0-20220512170110-6706a2966875 github.com/mdlayher/ndp v1.0.1 diff --git a/go-controller/go.sum b/go-controller/go.sum index 2af1883f7e..d0bde9c817 100644 --- a/go-controller/go.sum +++ b/go-controller/go.sum @@ -199,8 +199,8 @@ github.com/containerd/zfs v1.0.0/go.mod h1:m+m51S1DvAP6r3FcmYCp54bQ34pyOwTieQDNR github.com/containernetworking/cni v0.7.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= github.com/containernetworking/cni v0.8.0/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= github.com/containernetworking/cni v0.8.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= -github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ= -github.com/containernetworking/cni v1.1.2/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw= +github.com/containernetworking/cni v1.2.3 h1:hhOcjNVUQTnzdRJ6alC5XF+wd9mfGIUaj8FuJbEslXM= +github.com/containernetworking/cni v1.2.3/go.mod h1:DuLgF+aPd3DzcTQTtp/Nvl1Kim23oFKdm2okJzBQA5M= github.com/containernetworking/plugins v0.8.6/go.mod h1:qnw5mN19D8fIwkqW7oHHYDHVlzhJpcY6TQxn/fUyDDM= github.com/containernetworking/plugins v0.9.1/go.mod h1:xP/idU2ldlzN6m4p5LmGiwRDjeJr6FLK6vuiUwoH7P8= github.com/containernetworking/plugins v1.2.0 h1:SWgg3dQG1yzUo4d9iD8cwSVh1VqI+bP7mkPDoSfP9VU= @@ -498,8 +498,8 @@ github.com/k8snetworkplumbingwg/ipamclaims v0.5.0-alpha h1:b3iHeks/KTzhG2dNanaUZ github.com/k8snetworkplumbingwg/ipamclaims v0.5.0-alpha/go.mod h1:MGaMX1tJ7MlHDee4/xmqp3guQh+eDiuCLAauqD9K11Q= github.com/k8snetworkplumbingwg/multi-networkpolicy v1.0.1 h1:Egj1hEVYNXWFlKpgzAXxe/2o8VNiVcAJLrKzlinILQo= github.com/k8snetworkplumbingwg/multi-networkpolicy v1.0.1/go.mod h1:kEJ4WM849yNmXekuSXLRwb+LaZ9usC06O8JgoAIq+f4= -github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0 h1:BT3ghAY0q7lWib9rz+tVXDFkm27dJV6SLCn7TunZwo4= -github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0/go.mod h1:wxt2YWRVItDtaQmVSmaN5ubE2L1c9CiNoHQwSJnM8Ko= +github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.7.7 h1:z4P744DR+PIpkjwXSEc6TvN3L6LVzmUquFgmNm8wSUc= +github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.7.7/go.mod h1:CM7HAH5PNuIsqjMN0fGc1ydM74Uj+0VZFhob620nklw= github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc h1:v6+jUd70AayPbIRgTYUNpnBLG5cBPTY0+10y80CZeMk= github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc/go.mod h1:jyWzGe6ZtYiPq6ih6aXCOy6mZ49Y9mNyBOLBBXnli+k= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -603,7 +603,6 @@ github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vv github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/ginkgo/v2 v2.0.0/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= -github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= diff --git a/go-controller/pkg/allocator/pod/pod_annotation.go b/go-controller/pkg/allocator/pod/pod_annotation.go index 940952f7ff..31164d3a7c 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation.go +++ b/go-controller/pkg/allocator/pod/pod_annotation.go @@ -207,6 +207,35 @@ func allocatePodAnnotationWithTunnelID( return pod, podAnnotation, nil } +// validateStaticIPRequest checks if a static IP request can be honored when IPAM is enabled for the given network. +func validateStaticIPRequest(netInfo util.NetInfo, network *nadapi.NetworkSelectionElement, ipamClaim *ipamclaimsapi.IPAMClaim, podDesc string) error { + // Allow static IPs with IPAM only for primary networks with layer2 topology when EnablePreconfiguredUDNAddresses is enabled + // Feature gate integration: EnablePreconfiguredUDNAddresses controls static IP allocation with IPAM + if !util.IsPreconfiguredUDNAddressesEnabled() { + // Feature is disabled, reject static IPs with IPAM + return fmt.Errorf("cannot allocate a static IP request with IPAM for pod %s (custom network configuration disabled)", podDesc) + } + if !netInfo.IsPrimaryNetwork() { + // Static IP requests with IPAM are only supported on primary networks + return fmt.Errorf("cannot allocate a static IP request with IPAM for pod %s: only supported on primary networks", podDesc) + } + if netInfo.TopologyType() != types.Layer2Topology { + // Static IP requests with IPAM are only supported on layer2 topology networks. + // On other topologies, we cannot distinguish between already allocated IPs and + // IPs excluded from allocation, making it impossible to safely honor static IP + // requests when IPAM is enabled. + return fmt.Errorf("cannot allocate a static IP request with IPAM for pod %s: layer2 topology is required, but network has topology %q", podDesc, netInfo.TopologyType()) + } + if ipamClaim != nil && len(ipamClaim.Status.IPs) > 0 { + for _, ipRequest := range network.IPRequest { + if !util.IsItemInSlice(ipamClaim.Status.IPs, ipRequest) { + return fmt.Errorf("cannot allocate a static IP request with IPAM for pod %q: the pod references an ipam claim with IPs not containing the requested IP %q", podDesc, ipRequest) + } + } + } + return nil +} + // allocatePodAnnotationWithRollback allocates the PodAnnotation which includes // IPs, a mac address, routes, gateways and an ID. Returns the allocated pod // annotation and a pod with that annotation set. Returns a nil pod and the existing @@ -330,13 +359,11 @@ func allocatePodAnnotationWithRollback( } hasIPAMClaim = ipamClaim != nil && len(ipamClaim.Status.IPs) > 0 } + if hasIPAM && hasStaticIPRequest { - // for now we can't tell apart already allocated IPs from IPs excluded - // from allocation so we can't really honor static IP requests when - // there is IPAM as we don't really know if the requested IP should not - // be allocated or was already allocated by the same pod - err = fmt.Errorf("cannot allocate a static IP request with IPAM for pod %s", podDesc) - return + if err = validateStaticIPRequest(netInfo, network, ipamClaim, podDesc); err != nil { + return + } } // we need to update the annotation if it is missing IPs or MAC @@ -348,6 +375,7 @@ func allocatePodAnnotationWithRollback( if hasIPRequest { tentative.IPs, err = util.ParseIPNets(network.IPRequest) if err != nil { + klog.Warningf("Failed parsing IPRequest %+v for pod %s: %v", network.IPRequest, podDesc, err) return } } else if hasIPAMClaim { diff --git a/go-controller/pkg/allocator/pod/pod_annotation_test.go b/go-controller/pkg/allocator/pod/pod_annotation_test.go index 7930bd0e11..946ca79070 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation_test.go +++ b/go-controller/pkg/allocator/pod/pod_annotation_test.go @@ -109,25 +109,28 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { reallocate bool } tests := []struct { - name string - args args - ipam bool - idAllocation bool - persistentIPAllocation bool - role string - podAnnotation *util.PodAnnotation - invalidNetworkAnnotation bool - wantUpdatedPod bool - wantGeneratedMac bool - wantPodAnnotation *util.PodAnnotation - wantReleasedIPs []*net.IPNet - wantReleasedIPsOnRollback []*net.IPNet - wantReleaseID bool - wantRelasedIDOnRollback bool - wantErr bool - isSingleStackIPv4 bool - isSingleStackIPv6 bool - multiNetworkDisabled bool + name string + args args + netInfo util.NetInfo + nadName string + ipam bool + idAllocation bool + persistentIPAllocation bool + enablePreconfiguredUDNAddresses bool + role string + podAnnotation *util.PodAnnotation + invalidNetworkAnnotation bool + wantUpdatedPod bool + wantGeneratedMac bool + wantPodAnnotation *util.PodAnnotation + wantReleasedIPs []*net.IPNet + wantReleasedIPsOnRollback []*net.IPNet + wantReleaseID bool + wantRelasedIDOnRollback bool + wantErr bool + isSingleStackIPv4 bool + isSingleStackIPv6 bool + multiNetworkDisabled bool }{ { // on secondary L2 networks with no IPAM, we expect to generate a @@ -195,8 +198,9 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { { // on networks with IPAM, expect error if static IP request present // in the network selection annotation - name: "expect error, static ip request, IPAM", - ipam: true, + name: "expect error, static ip request, IPAM, non layer2", + netInfo: &util.DefaultNetInfo{}, + nadName: types.DefaultNetworkName, args: args{ network: &nadapi.NetworkSelectionElement{ IPRequest: []string{"192.168.0.3/24"}, @@ -540,9 +544,9 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { wantReleasedIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, { - // on networks with IPAM, honor a MAC request through the network + // on networks with IPAM, honor a IP and MAC request through the network // selection element - name: "expect requested MAC", + name: "expect requested MAC, IPAM", ipam: true, args: args{ network: &nadapi.NetworkSelectionElement{ @@ -575,6 +579,122 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), role: types.NetworkRolePrimary, // has to be primary network for default routes to be set }, + { + // on primary networks with IPAM and layer2 topology, expect success when EnablePreconfiguredUDNAddresses is enabled + name: "expect success, static IP and MAC with IPAM on primary network when EnablePreconfiguredUDNAddresses is enabled", + ipam: true, + enablePreconfiguredUDNAddresses: true, + role: types.NetworkRolePrimary, // has to be primary network for default routes to be set + persistentIPAllocation: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + MacRequest: requestedMAC, + IPRequest: []string{"192.168.0.101/24"}, + }, + ipAllocator: &ipAllocatorStub{ + nextIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.101/24"), + MAC: requestedMACParsed, + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: ovntest.MustParseIPNet("100.65.0.0/16"), + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + Role: types.NetworkRolePrimary, + }, + wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.101/24"), + }, + { + // on primary networks with IPAM and layer2 topology, expect success when EnablePreconfiguredUDNAddresses is enabled + name: "expect success, just static IP with IPAM on primary network when EnablePreconfiguredUDNAddresses is enabled", + ipam: true, + enablePreconfiguredUDNAddresses: true, + persistentIPAllocation: true, + role: types.NetworkRolePrimary, + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.101/24"}, + }, + ipAllocator: &ipAllocatorStub{ + nextIPs: ovntest.MustParseIPNets("192.168.0.101/24"), + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.101/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.101/24")[0].IP), + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: &net.IPNet{ + IP: ovntest.MustParseIP("100.65.0.0").To4(), + Mask: net.CIDRMask(16, 32), + }, + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + Role: types.NetworkRolePrimary, + }, + wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.101/24"), + }, + + { + // on networks with IPAM and layer2 topology, expect error when EnablePreconfiguredUDNAddresses is false + name: "expect error, static IP with IPAM on layer2 when EnablePreconfiguredUDNAddresses is false", + ipam: true, + role: types.NetworkRolePrimary, + persistentIPAllocation: true, + // enablePreconfiguredUDNAddresses defaults to false + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.101/24"}, + }, + }, + wantErr: true, + }, + { + // on networks with IPAM and layer2 topology, expect error when IPAMClaims status IPs do not match requested IPs + name: "expect error, static IP with IPAM on layer2 when IPAMClaims status IPs do not match requested IPs", + ipam: true, + role: types.NetworkRolePrimary, + persistentIPAllocation: true, + enablePreconfiguredUDNAddresses: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.101/24"}, + IPAMClaimReference: "my-ipam-claim", + }, + ipamClaim: &ipamclaimsapi.IPAMClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-ipam-claim", + }, + Status: ipamclaimsapi.IPAMClaimStatus{ + IPs: []string{"192.168.0.200/24"}, + }, + }, + }, + wantErr: true, + }, + { + // with preconfigured UDN address feature enabled still continue failing with secondary layer2 with ipam + static IPs + name: "expect error, static IP with IPAM on secondary network when EnablePreconfiguredUDNAddresses is enabled", + ipam: true, + enablePreconfiguredUDNAddresses: true, + persistentIPAllocation: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.101/24"}, + }, + }, + role: types.NetworkRoleSecondary, + wantErr: true, + }, { // on networks with IPAM, expect error on an invalid network // selection element @@ -777,6 +897,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { config.OVNKubernetesFeature.EnableInterconnect = tt.idAllocation config.OVNKubernetesFeature.EnableMultiNetwork = !tt.multiNetworkDisabled config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnablePreconfiguredUDNAddresses = tt.enablePreconfiguredUDNAddresses config.IPv4Mode = true if tt.isSingleStackIPv6 { config.IPv4Mode = false @@ -785,32 +906,33 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { if tt.isSingleStackIPv4 { config.IPv6Mode = false } - var netInfo util.NetInfo - netInfo = &util.DefaultNetInfo{} - nadName := types.DefaultNetworkName - if !tt.ipam || tt.idAllocation || tt.persistentIPAllocation || tt.args.ipamClaim != nil { - nadName = util.GetNADName(network.Namespace, network.Name) - var subnets string - if tt.ipam { - subnets = "192.168.0.0/24,2001:db8::/64" - if tt.isSingleStackIPv4 { - subnets = "192.168.0.0/24" - } else if tt.isSingleStackIPv6 { - subnets = "2001:db8::/64" + if tt.netInfo == nil { + tt.netInfo = &util.DefaultNetInfo{} + tt.nadName = types.DefaultNetworkName + if !tt.ipam || tt.idAllocation || tt.persistentIPAllocation || tt.args.ipamClaim != nil { + tt.nadName = util.GetNADName(network.Namespace, network.Name) + var subnets string + if tt.ipam { + subnets = "192.168.0.0/24,2001:db8::/64" + if tt.isSingleStackIPv4 { + subnets = "192.168.0.0/24" + } else if tt.isSingleStackIPv6 { + subnets = "2001:db8::/64" + } + } + tt.netInfo, err = util.NewNetInfo(&ovncnitypes.NetConf{ + Topology: types.Layer2Topology, + NetConf: cnitypes.NetConf{ + Name: network.Name, + }, + NADName: tt.nadName, + Subnets: subnets, + AllowPersistentIPs: tt.persistentIPAllocation, + Role: tt.role, + }) + if err != nil { + t.Fatalf("failed to create NetInfo: %v", err) } - } - netInfo, err = util.NewNetInfo(&ovncnitypes.NetConf{ - Topology: types.Layer2Topology, - NetConf: cnitypes.NetConf{ - Name: network.Name, - }, - NADName: nadName, - Subnets: subnets, - AllowPersistentIPs: tt.persistentIPAllocation, - Role: tt.role, - }) - if err != nil { - t.Fatalf("failed to create NetInfo: %v", err) } } @@ -836,7 +958,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { }, } if tt.podAnnotation != nil { - pod.Annotations, err = util.MarshalPodAnnotation(nil, tt.podAnnotation, nadName) + pod.Annotations, err = util.MarshalPodAnnotation(nil, tt.podAnnotation, tt.nadName) if err != nil { t.Fatalf("failed to set pod annotations: %v", err) } @@ -862,7 +984,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { pod, podAnnotation, rollback, err := allocatePodAnnotationWithRollback( tt.args.ipAllocator, tt.args.idAllocator, - netInfo, + tt.netInfo, node, pod, network, @@ -887,7 +1009,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { if tt.args.ipAllocator != nil { releasedIPs := tt.args.ipAllocator.(*ipAllocatorStub).releasedIPs - g.Expect(releasedIPs).To(gomega.Equal(tt.wantReleasedIPsOnRollback), "Release IP on rollback behaved unexpectedly") + g.Expect(releasedIPs).To(gomega.Equal(tt.wantReleasedIPsOnRollback), "Release IP on rollback behaved unexpectedly: %s", tt.netInfo.TopologyType()) } if tt.args.idAllocator != nil { diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index f97de8fc3f..66535f4c8a 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -34,10 +34,9 @@ const ( var _ = ginkgo.Describe("Cluster Manager", func() { var ( - app *cli.App - f *factory.WatchFactory - stopChan chan struct{} - wg *sync.WaitGroup + app *cli.App + f *factory.WatchFactory + wg *sync.WaitGroup ) const ( @@ -54,12 +53,10 @@ var _ = ginkgo.Describe("Cluster Manager", func() { app = cli.NewApp() app.Name = "test" app.Flags = config.Flags - stopChan = make(chan struct{}) wg = &sync.WaitGroup{} }) ginkgo.AfterEach(func() { - close(stopChan) if f != nil { f.Shutdown() } @@ -1436,4 +1433,102 @@ var _ = ginkgo.Describe("Cluster Manager", func() { }) }) + ginkgo.Context("starting the cluster manager", func() { + const networkName = "default" + + var fakeClient *util.OVNClusterManagerClientset + + ginkgo.BeforeEach(func() { + fakeClient = util.GetOVNClientset().GetClusterManagerClientset() + }) + + ginkgo.When("the required features are not enabled", func() { + ginkgo.It("does *not* automatically provision a NAD for the default network", func() { + app.Action = func(ctx *cli.Context) error { + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterMngr, err := clusterManager(fakeClient, f) + gomega.Expect(clusterMngr).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(clusterMngr.Start(ctx.Context)).To(gomega.Succeed()) + + _, err = fakeClient.NetworkAttchDefClient. + K8sCniCncfIoV1(). + NetworkAttachmentDefinitions(config.Kubernetes.OVNConfigNamespace). + Get( + context.Background(), + networkName, + metav1.GetOptions{}, + ) + gomega.Expect(err).To( + gomega.MatchError("network-attachment-definitions.k8s.cni.cncf.io \"default\" not found"), + ) + + return nil + } + gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) + }) + }) + + ginkgo.When("the multi-network, network-segmentation, and preconfigured-udn-addresses features are enabled", func() { + ginkgo.BeforeEach(func() { + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnablePreconfiguredUDNAddresses = true + }) + + ginkgo.It("automatically provisions a NAD for the default network", func() { + app.Action = func(ctx *cli.Context) error { + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + f, err = factory.NewClusterManagerWatchFactory(fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + clusterMngr, err := clusterManager(fakeClient, f) + gomega.Expect(clusterMngr).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + c, cancel := context.WithCancel(ctx.Context) + defer cancel() + gomega.Expect(clusterMngr.Start(c)).To(gomega.Succeed()) + defer clusterMngr.Stop() + + nad, err := fakeClient.NetworkAttchDefClient. + K8sCniCncfIoV1(). + NetworkAttachmentDefinitions(config.Kubernetes.OVNConfigNamespace). + Get( + context.Background(), + networkName, + metav1.GetOptions{}, + ) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + const expectedNADContents = `{"cniVersion": "0.4.0", "name": "ovn-kubernetes", "type": "ovn-k8s-cni-overlay"}` + gomega.Expect(nad.Spec.Config).To(gomega.Equal(expectedNADContents)) + + return nil + } + gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) + }) + }) + }) + }) + +func clusterManager(client *util.OVNClusterManagerClientset, f *factory.WatchFactory) (*ClusterManager, error) { + if err := f.Start(); err != nil { + return nil, fmt.Errorf("failed to start the CM watch factory: %w", err) + } + + clusterMngr, err := NewClusterManager(client, f, "identity", nil) + if err != nil { + return nil, fmt.Errorf("failed to start the CM watch factory: %w", err) + } + + return clusterMngr, nil +} diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index ef2ac665ae..f31e9ec8aa 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -16,6 +16,7 @@ import ( cache "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" + k8snodeutil "k8s.io/component-helpers/node/util" "k8s.io/klog/v2" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" @@ -576,7 +577,10 @@ func (h *networkClusterControllerEventHandler) UpdateResource(oldObj, newObj int // 1. we missed an add event (bug in kapi informer code) // 2. a user removed the annotation on the node // Either way to play it safe for now do a partial json unmarshal check - if !nodeFailed && util.NoHostSubnet(oldNode) == util.NoHostSubnet(newNode) && !h.ncc.nodeAllocator.NeedsNodeAllocation(newNode) { + _, nodeCondition := k8snodeutil.GetNodeCondition(&newNode.Status, corev1.NodeNetworkUnavailable) + nodeNetworkUnavailable := nodeCondition != nil && nodeCondition.Status == corev1.ConditionTrue + if !nodeFailed && util.NoHostSubnet(oldNode) == util.NoHostSubnet(newNode) && + !h.ncc.nodeAllocator.NeedsNodeAllocation(newNode) && !nodeNetworkUnavailable { // no other node updates would require us to reconcile again return nil } diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller.go b/go-controller/pkg/clustermanager/routeadvertisements/controller.go index 18fb3dbaae..cffbb3425e 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller.go @@ -593,14 +593,13 @@ func (c *Controller) generateFRRConfiguration( matchedNetworks sets.Set[string], ) (*frrtypes.FRRConfiguration, error) { routers := []frrtypes.Router{} - advertisements := sets.New(ra.Spec.Advertisements...) // go over the source routers for i, router := range source.Spec.BGP.Routers { targetVRF := ra.Spec.TargetVRF var matchedVRF, matchedNetwork string - var receivePrefixes, advertisePrefixes []string + var advertisePrefixes []string // We will use the router if: // - the router VRF matches the target VRF @@ -608,33 +607,25 @@ func (c *Controller) generateFRRConfiguration( // Prepare each scenario with a switch statement and check after that switch { case targetVRF == "auto" && router.VRF == "": - // match on default network/VRF, advertise node prefixes and receive - // any prefix of default network. + // match on default network/VRF, advertise node prefixes matchedVRF = "" matchedNetwork = types.DefaultNetworkName advertisePrefixes = selectedNetworks.hostNetworkSubnets[matchedNetwork] - receivePrefixes = selectedNetworks.networkSubnets[matchedNetwork] case targetVRF == "auto": - // match router.VRF to network.VRF, advertise node prefixes and - // receive any prefix of the matched network + // match router.VRF to network.VRF, advertise node prefixes matchedVRF = router.VRF matchedNetwork = selectedNetworks.networkVRFs[matchedVRF] advertisePrefixes = selectedNetworks.hostNetworkSubnets[matchedNetwork] - receivePrefixes = selectedNetworks.networkSubnets[matchedNetwork] case targetVRF == "": - // match on default network/VRF, advertise node prefixes and - // receive any prefix of selected networks + // match on default network/VRF, advertise node prefixes matchedVRF = "" matchedNetwork = types.DefaultNetworkName advertisePrefixes = selectedNetworks.hostSubnets - receivePrefixes = selectedNetworks.subnets default: - // match router.VRF to network.VRF, advertise node prefixes and - // receive any prefix of selected networks + // match router.VRF to network.VRF, advertise node prefixes matchedVRF = targetVRF matchedNetwork = selectedNetworks.networkVRFs[matchedVRF] advertisePrefixes = selectedNetworks.hostSubnets - receivePrefixes = selectedNetworks.subnets } if matchedVRF != router.VRF || len(advertisePrefixes) == 0 { // either this router VRF does not match the target VRF or we don't @@ -669,7 +660,6 @@ func (c *Controller) generateFRRConfiguration( isIPV6 := utilnet.IsIPv6String(neighbor.Address) advertisePrefixes := util.MatchAllIPNetsStringFamily(isIPV6, advertisePrefixes) - receivePrefixes := util.MatchAllIPNetsStringFamily(isIPV6, receivePrefixes) if len(advertisePrefixes) == 0 { continue } @@ -680,22 +670,6 @@ func (c *Controller) generateFRRConfiguration( Prefixes: advertisePrefixes, }, } - neighbor.ToReceive = frrtypes.Receive{ - Allowed: frrtypes.AllowedInPrefixes{ - Mode: frrtypes.AllowRestricted, - }, - } - if advertisements.Has(ratypes.PodNetwork) { - for _, prefix := range receivePrefixes { - neighbor.ToReceive.Allowed.Prefixes = append(neighbor.ToReceive.Allowed.Prefixes, - frrtypes.PrefixSelector{ - Prefix: prefix, - LE: selectedNetworks.prefixLength[prefix], - GE: selectedNetworks.prefixLength[prefix], - }, - ) - } - } targetRouter.Neighbors = append(targetRouter.Neighbors, neighbor) } if len(targetRouter.Neighbors) == 0 { @@ -1016,7 +990,7 @@ func (c *Controller) getSelectedNADs(networkSelectors apitypes.NetworkSelectors) case apitypes.DefaultNetwork: // if we are selecting the default networkdefault network label, // make sure a NAD exists for it - nad, err := c.getOrCreateDefaultNetworkNAD() + nad, err := util.EnsureDefaultNetworkNAD(c.nadLister, c.nadClient) if err != nil { return nil, fmt.Errorf("failed to get/create default network NAD: %w", err) } @@ -1047,34 +1021,6 @@ func (c *Controller) getSelectedNADs(networkSelectors apitypes.NetworkSelectors) return selected, nil } -// getOrCreateDefaultNetworkNAD ensure that a well-known NAD exists for the -// default network in ovn-k namespace. -func (c *Controller) getOrCreateDefaultNetworkNAD() (*nadtypes.NetworkAttachmentDefinition, error) { - nad, err := c.nadLister.NetworkAttachmentDefinitions(config.Kubernetes.OVNConfigNamespace).Get(types.DefaultNetworkName) - if err != nil && !apierrors.IsNotFound(err) { - return nil, err - } - if nad != nil { - return nad, nil - } - return c.nadClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(config.Kubernetes.OVNConfigNamespace).Create( - context.Background(), - &nadtypes.NetworkAttachmentDefinition{ - ObjectMeta: metav1.ObjectMeta{ - Name: types.DefaultNetworkName, - Namespace: config.Kubernetes.OVNConfigNamespace, - }, - Spec: nadtypes.NetworkAttachmentDefinitionSpec{ - Config: fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"ovn-kubernetes\", \"type\": \"%s\"}", config.CNI.Plugin), - }, - }, - // note we don't set ourselves as field manager for this create as we - // want to process the resulting event that would otherwise be filtered - // out in nadNeedsUpdate - metav1.CreateOptions{}, - ) -} - // getEgressIPsByNodesByNetworks iterates all existing egress IPs that apply to // any of the provided networks and returns a "node -> network -> eips" // map. diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go index 305418425c..c03c851808 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go @@ -152,7 +152,6 @@ type testNeighbor struct { ASN uint32 Address string DisableMP *bool - Receive []string Advertise []string } @@ -161,11 +160,6 @@ func (tn testNeighbor) Neighbor() frrapi.Neighbor { ASN: tn.ASN, Address: tn.Address, DisableMP: true, - ToReceive: frrapi.Receive{ - Allowed: frrapi.AllowedInPrefixes{ - Mode: frrapi.AllowRestricted, - }, - }, ToAdvertise: frrapi.Advertise{ Allowed: frrapi.AllowedOutPrefixes{ Mode: frrapi.AllowRestricted, @@ -176,31 +170,6 @@ func (tn testNeighbor) Neighbor() frrapi.Neighbor { if tn.DisableMP != nil { n.DisableMP = *tn.DisableMP } - for _, receive := range tn.Receive { - sep := strings.LastIndex(receive, "/") - if sep == -1 { - continue - } - if isLayer2 := strings.Count(receive, "/") == 1; isLayer2 { - n.ToReceive.Allowed.Prefixes = append(n.ToReceive.Allowed.Prefixes, - frrapi.PrefixSelector{ - Prefix: receive, - }, - ) - continue - } - - first := receive[:sep] - last := receive[sep+1:] - len := ovntest.MustAtoi(last) - n.ToReceive.Allowed.Prefixes = append(n.ToReceive.Allowed.Prefixes, - frrapi.PrefixSelector{ - Prefix: first, - GE: uint32(len), - LE: uint32(len), - }, - ) - } return n } @@ -433,7 +402,7 @@ func TestController_reconcile(t *testing.T) { NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, Routers: []*testRouter{ {ASN: 1, Prefixes: []string{"1.0.1.1/32", "1.1.0.0/24"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.1/32", "1.1.0.0/24"}, Receive: []string{"1.1.0.0/16/24"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.1/32", "1.1.0.0/24"}}, }}, }}, }, @@ -465,8 +434,8 @@ func TestController_reconcile(t *testing.T) { NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, Routers: []*testRouter{ {ASN: 1, Prefixes: []string{"1.0.1.1/32", "1.1.0.0/24", "fd01::/64", "fd03::ffff:100:101/128"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.1/32", "1.1.0.0/24"}, Receive: []string{"1.1.0.0/16/24"}}, - {ASN: 1, Address: "fd02::ffff:100:64", Advertise: []string{"fd01::/64", "fd03::ffff:100:101/128"}, Receive: []string{"fd01::/48/64"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.1/32", "1.1.0.0/24"}}, + {ASN: 1, Address: "fd02::ffff:100:64", Advertise: []string{"fd01::/64", "fd03::ffff:100:101/128"}}, }}, }}, }, @@ -503,7 +472,7 @@ func TestController_reconcile(t *testing.T) { NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, Routers: []*testRouter{ {ASN: 1, Prefixes: []string{"1.2.0.0/24", "1.3.0.0/24", "1.4.0.0/16", "1.5.0.0/16"}, Imports: []string{"black", "blue", "green", "red"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.2.0.0/24", "1.3.0.0/24", "1.4.0.0/16", "1.5.0.0/16"}, Receive: []string{"1.2.0.0/16/24", "1.3.0.0/16/24", "1.4.0.0/16", "1.5.0.0/16"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.2.0.0/24", "1.3.0.0/24", "1.4.0.0/16", "1.5.0.0/16"}}, }}, {ASN: 1, VRF: "black", Imports: []string{"default"}}, {ASN: 1, VRF: "blue", Imports: []string{"default"}}, @@ -636,7 +605,7 @@ func TestController_reconcile(t *testing.T) { NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, Routers: []*testRouter{ {ASN: 1, Prefixes: []string{"1.0.1.1/32", "1.1.0.0/24"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.1/32", "1.1.0.0/24"}, Receive: []string{"1.1.0.0/16/24"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.1/32", "1.1.0.0/24"}}, }}, }, }, @@ -744,13 +713,13 @@ func TestController_reconcile(t *testing.T) { NodeSelector: map[string]string{"kubernetes.io/hostname": "node1"}, Routers: []*testRouter{ {ASN: 1, Prefixes: []string{"1.1.1.0/24"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.1.1.0/24"}, Receive: []string{"1.1.0.0/16/24"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.1.1.0/24"}}, }}, {ASN: 1, VRF: "red", Prefixes: []string{"1.2.1.0/24"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.2.1.0/24"}, Receive: []string{"1.2.0.0/16/24"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.2.1.0/24"}}, }}, {ASN: 1, VRF: "green", Prefixes: []string{"1.4.0.0/16"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.4.0.0/16"}, Receive: []string{"1.4.0.0/16"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.4.0.0/16"}}, }}, }, }, @@ -760,7 +729,7 @@ func TestController_reconcile(t *testing.T) { NodeSelector: map[string]string{"kubernetes.io/hostname": "node2"}, Routers: []*testRouter{ {ASN: 1, Prefixes: []string{"1.1.2.0/24"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.1.2.0/24"}, Receive: []string{"1.1.0.0/16/24"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.1.2.0/24"}}, }}, }, }, @@ -770,10 +739,10 @@ func TestController_reconcile(t *testing.T) { NodeSelector: map[string]string{"kubernetes.io/hostname": "node2"}, Routers: []*testRouter{ {ASN: 1, VRF: "red", Prefixes: []string{"1.2.2.0/24"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.2.2.0/24"}, Receive: []string{"1.2.0.0/16/24"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.2.2.0/24"}}, }}, {ASN: 1, VRF: "green", Prefixes: []string{"1.4.0.0/16"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.4.0.0/16"}, Receive: []string{"1.4.0.0/16"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.4.0.0/16"}}, }}, }, }, @@ -799,7 +768,7 @@ func TestController_reconcile(t *testing.T) { NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, Routers: []*testRouter{ {ASN: 1, Prefixes: []string{"1.0.1.1/32", "1.1.0.0/24"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.1/32", "1.1.0.0/24"}, Receive: []string{"1.1.0.0/16/24"}}, + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.1/32", "1.1.0.0/24"}}, }}, }, }, @@ -1051,7 +1020,7 @@ func TestController_reconcile(t *testing.T) { g.Expect(err).ToNot(gomega.HaveOccurred()) // prime the default network NAD if defaultNAD == nil { - defaultNAD, err = c.getOrCreateDefaultNetworkNAD() + defaultNAD, err = util.EnsureDefaultNetworkNAD(c.nadLister, c.nadClient) g.Expect(err).ToNot(gomega.HaveOccurred()) // update it with the annotation that network manager would set defaultNAD.Annotations = map[string]string{types.OvnNetworkNameAnnotation: types.DefaultNetworkName} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go index 67292bd2ed..14963b9ed9 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go @@ -150,6 +150,12 @@ func (c *Controller) Run() error { return fmt.Errorf("unable to start user-defined network controller: %v", err) } + if util.IsPreconfiguredUDNAddressesEnabled() { + if _, err := util.EnsureDefaultNetworkNAD(c.nadLister, c.nadClient); err != nil { + return fmt.Errorf("failed to ensure default network nad exists: %w", err) + } + } + return nil } diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go index 0b3aa61194..02c7912e85 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go @@ -113,7 +113,7 @@ func validateTopology(spec SpecGetter) error { if spec.GetTopology() == userdefinednetworkv1.NetworkTopologyLayer3 && spec.GetLayer3() == nil || spec.GetTopology() == userdefinednetworkv1.NetworkTopologyLayer2 && spec.GetLayer2() == nil || spec.GetTopology() == userdefinednetworkv1.NetworkTopologyLocalnet && spec.GetLocalnet() == nil { - return fmt.Errorf("topology %[1]s is specified but %[1]s config is nil", spec.GetTopology()) + return config.NewTopologyConfigMismatchError(string(spec.GetTopology())) } return nil } @@ -142,10 +142,10 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s return nil, err } if ipamEnabled(cfg.IPAM) && len(cfg.Subnets) == 0 { - return nil, fmt.Errorf("subnets is required with ipam.mode is Enabled or unset") + return nil, config.NewSubnetsRequiredError() } if !ipamEnabled(cfg.IPAM) && len(cfg.Subnets) > 0 { - return nil, fmt.Errorf("subnets must be unset when ipam.mode is Disabled") + return nil, config.NewSubnetsMustBeUnsetError() } netConfSpec.Role = strings.ToLower(string(cfg.Role)) @@ -235,7 +235,7 @@ func validateIPAM(ipam *userdefinednetworkv1.IPAMConfig) error { return nil } if ipam.Lifecycle == userdefinednetworkv1.IPAMLifecyclePersistent && !ipamEnabled(ipam) { - return fmt.Errorf("lifecycle Persistent is only supported when ipam.mode is Enabled") + return config.NewIPAMLifecycleNotSupportedError() } return nil } diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go index ab0593e210..c02109bf0e 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go @@ -9,16 +9,24 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) var _ = Describe("NetAttachDefTemplate", func() { + + // before each test, set the IPv4Mode and IPv6Mode to true + BeforeEach(func() { + config.IPv4Mode = true + config.IPv6Mode = true + }) + DescribeTable("should fail to render NAD spec given", - func(spec *udnv1.UserDefinedNetworkSpec) { + func(spec *udnv1.UserDefinedNetworkSpec, expectedError string) { _, err := RenderNADSpec("foo", "bar", spec) - Expect(err).To(HaveOccurred()) + Expect(err).To(MatchError(ContainSubstring(expectedError))) }, Entry("invalid layer2 subnets", &udnv1.UserDefinedNetworkSpec{ @@ -27,6 +35,7 @@ var _ = Describe("NetAttachDefTemplate", func() { Subnets: udnv1.DualStackCIDRs{"abc"}, }, }, + config.NewCIDRNotProperlyFormattedError("abc").Error(), ), Entry("invalid layer3 cluster-subnet", &udnv1.UserDefinedNetworkSpec{ @@ -35,6 +44,7 @@ var _ = Describe("NetAttachDefTemplate", func() { Subnets: []udnv1.Layer3Subnet{{CIDR: "!", HostSubnet: 16}}, }, }, + config.NewInvalidCIDRAddressError().Error(), ), Entry("invalid layer3 host-subnet mask", &udnv1.UserDefinedNetworkSpec{ @@ -45,6 +55,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, }, + config.NewHostSubnetMaskError(24, 24).Error(), // -1 is not a valid host subnet mask, it's converted to 24 ), Entry("layer3 host-subnet mask is smaller then cluster-subnet mask", &udnv1.UserDefinedNetworkSpec{ @@ -55,6 +66,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, }, + config.NewHostSubnetMaskError(16, 24).Error(), ), Entry("layer3 host-subnet mask equal to cluster-subnet mask", &udnv1.UserDefinedNetworkSpec{ @@ -65,16 +77,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, }, - ), - Entry("layer3 host-subnet mask is smaller then cluster-subnet mask", - &udnv1.UserDefinedNetworkSpec{ - Topology: udnv1.NetworkTopologyLayer3, - Layer3: &udnv1.Layer3Config{ - Subnets: []udnv1.Layer3Subnet{ - {CIDR: "10.10.0.0/16", HostSubnet: 8}, - }, - }, - }, + config.NewHostSubnetMaskError(24, 24).Error(), ), Entry("invalid layer3 host-subnet; IPv4 mask is bigger then 32", &udnv1.UserDefinedNetworkSpec{ @@ -85,102 +88,98 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, }, + config.NewInvalidIPv4HostSubnetError().Error(), ), - Entry("invalid join subnets", + Entry("invalid layer2 join subnets", &udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"10.10.0.0/24"}, JoinSubnets: udnv1.DualStackCIDRs{"abc"}, }, }, + config.NewCIDRNotProperlyFormattedError("abc").Error(), ), - Entry("invalid dual-stack join subnets, invalid IPv4 CIDR", + Entry("invalid layer2 dual-stack join subnets, invalid IPv4 CIDR", &udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ Role: udnv1.NetworkRolePrimary, - JoinSubnets: udnv1.DualStackCIDRs{"!", "fd50::0/125"}, + Subnets: udnv1.DualStackCIDRs{"10.10.0.0/24"}, + JoinSubnets: udnv1.DualStackCIDRs{"fd50::0/125", "!"}, }, }, + config.NewCIDRNotProperlyFormattedError("!").Error(), ), - Entry("invalid dual-stack join subnets, invalid IPv6 CIDR", + Entry("invalid layer2 dual-stack join subnets, invalid IPv6 CIDR", &udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"10.10.0.0/24"}, JoinSubnets: udnv1.DualStackCIDRs{"10.10.0.0/24", "!"}, }, }, + config.NewCIDRNotProperlyFormattedError("!").Error(), ), - Entry("invalid dual-stack join subnets, multiple valid IPv4 CIDRs", - &udnv1.UserDefinedNetworkSpec{ - Topology: udnv1.NetworkTopologyLayer2, - Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRolePrimary, - JoinSubnets: udnv1.DualStackCIDRs{"10.10.0.0/24", "10.20.0.0/24", "10.30.0.0/24"}, - }, - }, - ), - Entry("invalid dual-stack join subnets, multiple valid IPv6 CIDRs", - &udnv1.UserDefinedNetworkSpec{ - Topology: udnv1.NetworkTopologyLayer2, - Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRolePrimary, - JoinSubnets: udnv1.DualStackCIDRs{"fd40::0/125", "fd10::0/125", "fd50::0/125"}, - }, - }, - ), - Entry("invalid dual-stack join subnets, multiple valid IPv4 & IPv6 CIDRs", - &udnv1.UserDefinedNetworkSpec{ - Topology: udnv1.NetworkTopologyLayer2, - Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRolePrimary, - JoinSubnets: udnv1.DualStackCIDRs{"fd40::0/125", "10.10.0.0/24", "fd50::0/125", "10.20.0.0/24"}, - }, - }, - ), + // The validation for max number of subnets is moved to the CRD validation, + // no need to test it here. Entry("invalid join subnets, overlapping with cluster-default join-subnet, IPv4", &udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"10.10.0.0/24"}, JoinSubnets: udnv1.DualStackCIDRs{"100.64.10.0/24"}, }, }, + config.NewSubnetOverlapError( + config.ConfigSubnet{SubnetType: config.UserDefinedJoinSubnet, Subnet: util.MustParseCIDR("100.64.10.0/24")}, + config.ConfigSubnet{SubnetType: config.ConfigSubnetJoin, Subnet: util.MustParseCIDR("100.64.0.0/16")}).Error(), ), Entry("invalid join subnets, overlapping with cluster-default join-subnet, IPv6", &udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"10.10.0.0/24"}, JoinSubnets: udnv1.DualStackCIDRs{"fd98::4/127"}, }, }, + config.NewSubnetOverlapError( + config.ConfigSubnet{SubnetType: config.UserDefinedJoinSubnet, Subnet: util.MustParseCIDR("fd98::4/127")}, + config.ConfigSubnet{SubnetType: config.ConfigSubnetJoin, Subnet: util.MustParseCIDR("fd98::/64")}).Error(), ), Entry("invalid join subnets, overlapping with cluster-default join-subnet, dual-stack", &udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"10.10.0.0/24"}, JoinSubnets: udnv1.DualStackCIDRs{"100.64.10.0/24", "fd98::4/127"}, }, }, + config.NewSubnetOverlapError( + config.ConfigSubnet{SubnetType: config.UserDefinedJoinSubnet, Subnet: util.MustParseCIDR("100.64.10.0/24")}, + config.ConfigSubnet{SubnetType: config.ConfigSubnetJoin, Subnet: util.MustParseCIDR("100.64.0.0/16")}).Error(), ), ) DescribeTable("should fail to render NAD manifest, given", - func(obj client.Object) { + func(obj client.Object, expectedError string) { _, err := RenderNetAttachDefManifest(obj, "test") - Expect(err).To(HaveOccurred()) + Expect(err).To(MatchError(ContainSubstring(expectedError))) }, Entry("UDN, invalid topology: topology layer2 & layer3 config", &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer3: &udnv1.Layer3Config{}}}, + config.NewTopologyConfigMismatchError(string(udnv1.NetworkTopologyLayer2)).Error(), ), Entry("UDN, invalid topology: topology layer3 & layer2 config", &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer3, Layer2: &udnv1.Layer2Config{}}}, + config.NewTopologyConfigMismatchError(string(udnv1.NetworkTopologyLayer3)).Error(), ), Entry("UDN, invalid IPAM config: IPAM lifecycle & disabled ipam mode", &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ @@ -194,6 +193,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, }}, + config.NewIPAMLifecycleNotSupportedError().Error(), ), Entry("UDN, invalid IPAM config: IPAM enabled & no subnet", &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ @@ -206,6 +206,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, }}, + config.NewSubnetsRequiredError().Error(), ), Entry("UDN, invalid IPAM config: IPAM disabled & subnet", &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ @@ -218,39 +219,57 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, }}, + config.NewSubnetsMustBeUnsetError().Error(), ), Entry("CUDN, invalid topology: topology layer2 & layer3 config", &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer3: &udnv1.Layer3Config{}}}}, + config.NewTopologyConfigMismatchError(string(udnv1.NetworkTopologyLayer2)).Error(), ), Entry("CUDN, invalid topology: topology layer2 & localnet config", &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Localnet: &udnv1.LocalnetConfig{}}}}, + config.NewTopologyConfigMismatchError(string(udnv1.NetworkTopologyLayer2)).Error(), ), Entry("CUDN, invalid topology: topology layer3 & layer2 config", &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLayer3, Layer2: &udnv1.Layer2Config{}}}}, + config.NewTopologyConfigMismatchError(string(udnv1.NetworkTopologyLayer3)).Error(), ), Entry("CUDN, invalid topology: topology layer3 & localnet config", &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLayer3, Localnet: &udnv1.LocalnetConfig{}}}}, + config.NewTopologyConfigMismatchError(string(udnv1.NetworkTopologyLayer3)).Error(), ), Entry("CUDN, invalid topology: topology localnet & layer2 config", &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLocalnet, Layer2: &udnv1.Layer2Config{}}}}, + config.NewTopologyConfigMismatchError(string(udnv1.NetworkTopologyLocalnet)).Error(), ), Entry("CUDN, invalid topology: topology localnet & layer3 config", &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLocalnet, Layer3: &udnv1.Layer3Config{}}}}, + config.NewTopologyConfigMismatchError(string(udnv1.NetworkTopologyLocalnet)).Error(), + ), + Entry("CUDN, localnet: IPv4 excludeSubnets not in range of subnets", + &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLocalnet, + Localnet: &udnv1.LocalnetConfig{Role: udnv1.NetworkRoleSecondary, PhysicalNetworkName: "localnet1", + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16", "2001:dbb::/64"}, + ExcludeSubnets: []udnv1.CIDR{"192.200.0.0/30"}, + }, + }}}, + config.NewExcludedSubnetNotContainedError("192.200.0.0/30").Error(), ), - Entry("CUDN, localnet: excludeSubnets not in range of subnets", + Entry("CUDN, localnet: IPv6 excludeSubnets not in range of subnets", &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLocalnet, Localnet: &udnv1.LocalnetConfig{Role: udnv1.NetworkRoleSecondary, PhysicalNetworkName: "localnet1", Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16", "2001:dbb::/64"}, - ExcludeSubnets: []udnv1.CIDR{"192.200.0.0/30", "2001:aaa::/127", "192.300.0.1/32", "2001:bbb::1/120"}, + ExcludeSubnets: []udnv1.CIDR{"2001:aaa::/127"}, }, }}}, + config.NewExcludedSubnetNotContainedError("2001:aaa::/127").Error(), ), ) diff --git a/go-controller/pkg/config/errors.go b/go-controller/pkg/config/errors.go new file mode 100644 index 0000000000..9f634c2509 --- /dev/null +++ b/go-controller/pkg/config/errors.go @@ -0,0 +1,104 @@ +package config + +import "fmt" + +type ValidationErrorType string + +const ( + ErrCIDRNotProperlyFormatted ValidationErrorType = "CIDRNotProperlyFormatted" + ErrInvalidCIDRAddress ValidationErrorType = "InvalidCIDRAddress" + ErrHostSubnetMask ValidationErrorType = "HostSubnetMask" + ErrInvalidIPv4HostSubnet ValidationErrorType = "InvalidIPv4HostSubnet" + ErrSubnetOverlap ValidationErrorType = "SubnetOverlap" + ErrExcludedSubnetNotContained ValidationErrorType = "ExcludedSubnetNotContained" + ErrTopologyConfigMismatch ValidationErrorType = "TopologyConfigMismatch" + ErrIPAMLifecycleNotSupported ValidationErrorType = "IPAMLifecycleNotSupported" + ErrSubnetsRequired ValidationErrorType = "SubnetsRequired" + ErrSubnetsMustBeUnset ValidationErrorType = "SubnetsMustBeUnset" +) + +type ValidationError struct { + Type ValidationErrorType + Message string +} + +func (e *ValidationError) Error() string { + return e.Message +} + +// CIDR Validation Errors +func NewCIDRNotProperlyFormattedError(cidr string) *ValidationError { + return &ValidationError{ + Type: ErrCIDRNotProperlyFormatted, + Message: fmt.Sprintf("CIDR %q not properly formatted", cidr), + } +} + +func NewInvalidCIDRAddressError() *ValidationError { + return &ValidationError{ + Type: ErrInvalidCIDRAddress, + Message: "invalid CIDR address", + } +} + +// Subnet Validation Errors +func NewHostSubnetMaskError(hostSubnetLength, clusterSubnetLength int) *ValidationError { + return &ValidationError{ + Type: ErrHostSubnetMask, + Message: fmt.Sprintf("cannot use a host subnet length mask shorter than or equal to the cluster subnet mask. "+ + "host subnet length: %d, cluster subnet length: %d", hostSubnetLength, clusterSubnetLength), + } +} + +func NewInvalidIPv4HostSubnetError() *ValidationError { + return &ValidationError{ + Type: ErrInvalidIPv4HostSubnet, + Message: "invalid host subnet, IPv4 subnet must be < 32", + } +} + +func NewSubnetOverlapError(a, b ConfigSubnet) *ValidationError { + return &ValidationError{ + Type: ErrSubnetOverlap, + Message: fmt.Sprintf("%s %q overlaps %s %q", + a.SubnetType, a.Subnet.String(), + b.SubnetType, b.Subnet.String()), + } +} + +func NewExcludedSubnetNotContainedError(excludeSubnet interface{}) *ValidationError { + return &ValidationError{ + Type: ErrExcludedSubnetNotContained, + Message: fmt.Sprintf("the provided network subnets do not contain excluded subnets %v", excludeSubnet), + } +} + +// Topology Validation Errors +func NewTopologyConfigMismatchError(topology string) *ValidationError { + return &ValidationError{ + Type: ErrTopologyConfigMismatch, + Message: fmt.Sprintf("topology %[1]s is specified but %[1]s config is nil", topology), + } +} + +// IPAM Validation Errors +func NewIPAMLifecycleNotSupportedError() *ValidationError { + return &ValidationError{ + Type: ErrIPAMLifecycleNotSupported, + Message: "lifecycle Persistent is only supported when ipam.mode is Enabled", + } +} + +func NewSubnetsRequiredError() *ValidationError { + return &ValidationError{ + Type: ErrSubnetsRequired, + Message: "subnets is required with ipam.mode is Enabled or unset", + } +} + +func NewSubnetsMustBeUnsetError() *ValidationError { + return &ValidationError{ + Type: ErrSubnetsMustBeUnset, + Message: "subnets must be unset when ipam.mode is Disabled", + } +} diff --git a/go-controller/pkg/config/utils.go b/go-controller/pkg/config/utils.go index f0f0ff1a6b..20f4e0b35c 100644 --- a/go-controller/pkg/config/utils.go +++ b/go-controller/pkg/config/utils.go @@ -62,7 +62,7 @@ func ParseClusterSubnetEntriesWithDefaults(clusterSubnetCmd string, ipv4HostLeng splitClusterEntry := strings.Split(clusterEntry, "/") if len(splitClusterEntry) < 2 || len(splitClusterEntry) > 3 { - return nil, fmt.Errorf("CIDR %q not properly formatted", clusterEntry) + return nil, NewCIDRNotProperlyFormattedError(clusterEntry) } var err error @@ -78,7 +78,7 @@ func ParseClusterSubnetEntriesWithDefaults(clusterSubnetCmd string, ipv4HostLeng entryMaskLength, _ := parsedClusterEntry.CIDR.Mask.Size() if len(splitClusterEntry) == 3 { if !hostLengthAllowed { - return nil, fmt.Errorf("CIDR %q not properly formatted", clusterEntry) + return nil, NewCIDRNotProperlyFormattedError(clusterEntry) } tmp, err := strconv.Atoi(splitClusterEntry[2]) if err != nil { @@ -100,12 +100,11 @@ func ParseClusterSubnetEntriesWithDefaults(clusterSubnetCmd string, ipv4HostLeng } if !ipv6 && parsedClusterEntry.HostSubnetLength > 32 { - return nil, fmt.Errorf("invalid host subnet, IPv4 subnet must be < 32") + return nil, NewInvalidIPv4HostSubnetError() } if parsedClusterEntry.HostSubnetLength <= entryMaskLength { - return nil, fmt.Errorf("cannot use a host subnet length mask shorter than or equal to the cluster subnet mask. "+ - "host subnet length: %d, cluster subnet length: %d", parsedClusterEntry.HostSubnetLength, entryMaskLength) + return nil, NewHostSubnetMaskError(parsedClusterEntry.HostSubnetLength, entryMaskLength) } } @@ -216,9 +215,7 @@ func (cs *ConfigSubnets) CheckForOverlaps() error { for j := 0; j < i; j++ { sj := cs.Subnets[j] if si.Subnet.Contains(sj.Subnet.IP) || sj.Subnet.Contains(si.Subnet.IP) { - return fmt.Errorf("illegal network configuration: %s %q overlaps %s %q", - si.SubnetType, si.Subnet.String(), - sj.SubnetType, sj.Subnet.String()) + return NewSubnetOverlapError(si, sj) } } } diff --git a/go-controller/pkg/libovsdb/ops/router.go b/go-controller/pkg/libovsdb/ops/router.go index 18b3931a1f..5f0ce594d4 100644 --- a/go-controller/pkg/libovsdb/ops/router.go +++ b/go-controller/pkg/libovsdb/ops/router.go @@ -932,6 +932,11 @@ func RemoveLoadBalancersFromLogicalRouterOps(nbClient libovsdbclient.Client, ops return ops, err } +func getNATMutableFields(nat *nbdb.NAT) []interface{} { + return []interface{}{&nat.Type, &nat.ExternalIP, &nat.LogicalIP, &nat.LogicalPort, &nat.ExternalMAC, + &nat.ExternalIDs, &nat.Match, &nat.Options, &nat.ExternalPortRange, &nat.GatewayPort, &nat.Priority} +} + func buildNAT( natType nbdb.NATType, externalIP string, @@ -1035,7 +1040,7 @@ func BuildDNATAndSNATWithMatch( // isEquivalentNAT checks if the `searched` NAT is equivalent to `existing`. // Returns true if the UUID is set in `searched` and matches the UUID of `existing`. // Otherwise, perform the following checks: -// - Compare the Type and Match fields. +// - Compare the Type. // - Compare ExternalIP if it is set in `searched`. // - Compare LogicalIP if the Type in `searched` is SNAT. // - Compare LogicalPort if it is set in `searched`. @@ -1050,10 +1055,6 @@ func isEquivalentNAT(existing *nbdb.NAT, searched *nbdb.NAT) bool { return false } - if searched.Match != existing.Match { - return false - } - // Compare externalIP if it's not empty. if searched.ExternalIP != "" && searched.ExternalIP != existing.ExternalIP { return false @@ -1156,7 +1157,7 @@ func CreateOrUpdateNATsOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation } opModel := operationModel{ Model: inputNat, - OnModelUpdates: onModelUpdatesAllNonDefault(), + OnModelUpdates: getNATMutableFields(inputNat), ErrNotFound: false, BulkOp: false, DoAfter: func() { router.Nat = append(router.Nat, inputNat.UUID) }, @@ -1284,7 +1285,7 @@ func UpdateNATOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, nats .. opModel := []operationModel{ { Model: nat, - OnModelUpdates: onModelUpdatesAllNonDefault(), + OnModelUpdates: getNATMutableFields(nat), ErrNotFound: true, BulkOp: false, }, diff --git a/go-controller/pkg/node/bridgeconfig/bridgeflows.go b/go-controller/pkg/node/bridgeconfig/bridgeflows.go index d03b88c8de..200c1540ec 100644 --- a/go-controller/pkg/node/bridgeconfig/bridgeflows.go +++ b/go-controller/pkg/node/bridgeconfig/bridgeflows.go @@ -349,13 +349,12 @@ func (b *BridgeConfiguration) flowsForDefaultBridge(extraIPs []net.IP) ([]string bridgeMacAddress, mod_vlan_id, defaultNetConfig.OfPortPatch)) // table 2, priority 200, dispatch from UDN -> Host -> OVN. These packets have - // already been SNATed to the UDN's masq IP or have been marked with the UDN's packet mark. + // already been SNATed to the UDN's masquerade IP or have been marked with the UDN's packet mark. if config.IPv4Mode { for _, netConfig := range b.patchedNetConfigs() { if netConfig.IsDefaultNetwork() { continue } - srcIPOrSubnet := netConfig.V4MasqIPs.ManagementPort.IP.String() if util.IsRouteAdvertisementsEnabled() && netConfig.Advertised.Load() { var udnAdvertisedSubnets []*net.IPNet for _, clusterEntry := range netConfig.Subnets { @@ -368,9 +367,14 @@ func (b *BridgeConfiguration) flowsForDefaultBridge(extraIPs []net.IP) ([]string klog.Infof("Unable to determine IPV4 UDN subnet for the provided family isIPV6: %v", err) continue } - - // Use the filtered subnets for the flow compute instead of the masqueradeIP - srcIPOrSubnet = matchingIPFamilySubnet.String() + // In addition to the masqueradeIP based flows, we also need the podsubnet based flows for + // advertised networks since UDN pod to clusterIP is unSNATed and we need this traffic to be taken into + // the correct patch port of it's own network where it's a deadend if the clusterIP is not part of + // that UDN network and works if it is part of the UDN network. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, table=2, ip, ip_src=%s, "+ + "actions=drop", + nodetypes.DefaultOpenFlowCookie, matchingIPFamilySubnet.String())) } // Drop traffic coming from the masquerade IP or the UDN subnet(for advertised UDNs) to ensure that // isolation between networks is enforced. This handles the case where a pod on the UDN subnet is sending traffic to @@ -378,7 +382,7 @@ func (b *BridgeConfiguration) flowsForDefaultBridge(extraIPs []net.IP) ([]string dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, priority=200, table=2, ip, ip_src=%s, "+ "actions=drop", - nodetypes.DefaultOpenFlowCookie, srcIPOrSubnet)) + nodetypes.DefaultOpenFlowCookie, netConfig.V4MasqIPs.ManagementPort.IP.String())) dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, priority=250, table=2, ip, pkt_mark=%s, "+ @@ -393,7 +397,6 @@ func (b *BridgeConfiguration) flowsForDefaultBridge(extraIPs []net.IP) ([]string if netConfig.IsDefaultNetwork() { continue } - srcIPOrSubnet := netConfig.V6MasqIPs.ManagementPort.IP.String() if util.IsRouteAdvertisementsEnabled() && netConfig.Advertised.Load() { var udnAdvertisedSubnets []*net.IPNet for _, clusterEntry := range netConfig.Subnets { @@ -407,13 +410,15 @@ func (b *BridgeConfiguration) flowsForDefaultBridge(extraIPs []net.IP) ([]string continue } - // Use the filtered subnets for the flow compute instead of the masqueradeIP - srcIPOrSubnet = matchingIPFamilySubnet.String() + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, table=2, ip6, ipv6_src=%s, "+ + "actions=drop", + nodetypes.DefaultOpenFlowCookie, matchingIPFamilySubnet.String())) } dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, priority=200, table=2, ip6, ipv6_src=%s, "+ "actions=drop", - nodetypes.DefaultOpenFlowCookie, srcIPOrSubnet)) + nodetypes.DefaultOpenFlowCookie, netConfig.V6MasqIPs.ManagementPort.IP.String())) dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, priority=250, table=2, ip6, pkt_mark=%s, "+ "actions=set_field:%s->eth_dst,output:%s", diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index f1281980a8..47ba8f6262 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -188,7 +188,7 @@ func NewDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, net nc.initRetryFrameworkForNode() - err = setupPMTUDNFTSets() + err = setupRemoteNodeNFTSets() if err != nil { return nil, fmt.Errorf("failed to setup PMTUD nftables sets: %w", err) } @@ -1515,25 +1515,34 @@ func (nc *DefaultNodeNetworkController) WatchNodes() error { func (nc *DefaultNodeNetworkController) addOrUpdateNode(node *corev1.Node) error { var nftElems []*knftables.Element var addrs []string - for _, address := range node.Status.Addresses { - if address.Type != corev1.NodeInternalIP { - continue - } - nodeIP := net.ParseIP(address.Address) - if nodeIP == nil { - continue - } + // Use GetNodeAddresses to get all node IPs (including current node for openflow) + ipsv4, ipsv6, err := util.GetNodeAddresses(config.IPv4Mode, config.IPv6Mode, node) + if err != nil { + return fmt.Errorf("failed to get node addresses for node %q: %w", node.Name, err) + } + + // Process IPv4 addresses + for _, nodeIP := range ipsv4 { addrs = append(addrs, nodeIP.String()) klog.Infof("Adding remote node %q, IP: %s to PMTUD blocking rules", node.Name, nodeIP) - if utilnet.IsIPv4(nodeIP) { + // Only add to nftables if this is remote node + if node.Name != nc.name { nftElems = append(nftElems, &knftables.Element{ - Set: types.NFTNoPMTUDRemoteNodeIPsv4, + Set: types.NFTRemoteNodeIPsv4, Key: []string{nodeIP.String()}, }) - } else { + } + } + + // Process IPv6 addresses + for _, nodeIP := range ipsv6 { + addrs = append(addrs, nodeIP.String()) + klog.Infof("Adding remote node %q, IP: %s to PMTUD blocking rules", node.Name, nodeIP) + // Only add to nftables if this is remote node + if node.Name != nc.name { nftElems = append(nftElems, &knftables.Element{ - Set: types.NFTNoPMTUDRemoteNodeIPsv6, + Set: types.NFTRemoteNodeIPsv6, Key: []string{nodeIP.String()}, }) } @@ -1557,12 +1566,12 @@ func removePMTUDNodeNFTRules(nodeIPs []net.IP) error { // Remove IPs from NFT sets if utilnet.IsIPv4(nodeIP) { nftElems = append(nftElems, &knftables.Element{ - Set: types.NFTNoPMTUDRemoteNodeIPsv4, + Set: types.NFTRemoteNodeIPsv4, Key: []string{nodeIP.String()}, }) } else { nftElems = append(nftElems, &knftables.Element{ - Set: types.NFTNoPMTUDRemoteNodeIPsv6, + Set: types.NFTRemoteNodeIPsv6, Key: []string{nodeIP.String()}, }) } @@ -1578,18 +1587,18 @@ func removePMTUDNodeNFTRules(nodeIPs []net.IP) error { func (nc *DefaultNodeNetworkController) deleteNode(node *corev1.Node) { gw := nc.Gateway.(*gateway) gw.openflowManager.deleteFlowsByKey(getPMTUDKey(node.Name)) - ipsToRemove := make([]net.IP, 0) - for _, address := range node.Status.Addresses { - if address.Type != corev1.NodeInternalIP { - continue - } - nodeIP := net.ParseIP(address.Address) - if nodeIP == nil { - continue - } - ipsToRemove = append(ipsToRemove, nodeIP) + + // Use GetNodeAddresses to get node IPs + ipsv4, ipsv6, err := util.GetNodeAddresses(config.IPv4Mode, config.IPv6Mode, node) + if err != nil { + klog.Errorf("Failed to get node addresses for node %q: %v", node.Name, err) + return } + ipsToRemove := make([]net.IP, 0, len(ipsv4)+len(ipsv6)) + ipsToRemove = append(ipsToRemove, ipsv4...) + ipsToRemove = append(ipsToRemove, ipsv6...) + klog.Infof("Deleting NFT elements for node: %s", node.Name) if err := removePMTUDNodeNFTRules(ipsToRemove); err != nil { klog.Errorf("Failed to delete nftables rules for PMTUD blocking for node %q: %v", node.Name, err) @@ -1610,33 +1619,34 @@ func (nc *DefaultNodeNetworkController) syncNodes(objs []interface{}) error { if node.Name == nc.name { continue } - for _, address := range node.Status.Addresses { - if address.Type != corev1.NodeInternalIP { - continue - } - nodeIP := net.ParseIP(address.Address) - if nodeIP == nil { - continue - } - // Remove IPs from NFT sets - if utilnet.IsIPv4(nodeIP) { - keepNFTSetElemsV4 = append(keepNFTSetElemsV4, &knftables.Element{ - Set: types.NFTNoPMTUDRemoteNodeIPsv4, - Key: []string{nodeIP.String()}, - }) - } else { - keepNFTSetElemsV6 = append(keepNFTSetElemsV6, &knftables.Element{ - Set: types.NFTNoPMTUDRemoteNodeIPsv6, - Key: []string{nodeIP.String()}, - }) - } + // Use GetNodeAddresses to get node IPs + ipsv4, ipsv6, err := util.GetNodeAddresses(config.IPv4Mode, config.IPv6Mode, node) + if err != nil { + klog.Errorf("Failed to get node addresses for node %q: %v", node.Name, err) + continue + } + + // Process IPv4 addresses + for _, nodeIP := range ipsv4 { + keepNFTSetElemsV4 = append(keepNFTSetElemsV4, &knftables.Element{ + Set: types.NFTRemoteNodeIPsv4, + Key: []string{nodeIP.String()}, + }) + } + + // Process IPv6 addresses + for _, nodeIP := range ipsv6 { + keepNFTSetElemsV6 = append(keepNFTSetElemsV6, &knftables.Element{ + Set: types.NFTRemoteNodeIPsv6, + Key: []string{nodeIP.String()}, + }) } } - if err := recreateNFTSet(types.NFTNoPMTUDRemoteNodeIPsv4, keepNFTSetElemsV4); err != nil { + if err := recreateNFTSet(types.NFTRemoteNodeIPsv4, keepNFTSetElemsV4); err != nil { errors = append(errors, err) } - if err := recreateNFTSet(types.NFTNoPMTUDRemoteNodeIPsv6, keepNFTSetElemsV6); err != nil { + if err := recreateNFTSet(types.NFTRemoteNodeIPsv6, keepNFTSetElemsV6); err != nil { errors = append(errors, err) } diff --git a/go-controller/pkg/node/default_node_network_controller_test.go b/go-controller/pkg/node/default_node_network_controller_test.go index a1413a7dd1..366ee881d6 100644 --- a/go-controller/pkg/node/default_node_network_controller_test.go +++ b/go-controller/pkg/node/default_node_network_controller_test.go @@ -38,18 +38,18 @@ import ( const v4PMTUDNFTRules = ` add table inet ovn-kubernetes -add rule inet ovn-kubernetes no-pmtud ip daddr @no-pmtud-remote-node-ips-v4 meta l4proto icmp icmp type 3 icmp code 4 counter drop +add rule inet ovn-kubernetes no-pmtud ip daddr @remote-node-ips-v4 meta l4proto icmp icmp type 3 icmp code 4 counter drop add chain inet ovn-kubernetes no-pmtud { type filter hook output priority 0 ; comment "Block egress needs frag/packet too big to remote k8s nodes" ; } -add set inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { type ipv4_addr ; comment "Block egress ICMP needs frag to remote Kubernetes nodes" ; } -add set inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { type ipv6_addr ; comment "Block egress ICMPv6 packet too big to remote Kubernetes nodes" ; } +add set inet ovn-kubernetes remote-node-ips-v4 { type ipv4_addr ; comment "Block egress ICMP needs frag to remote Kubernetes nodes" ; } +add set inet ovn-kubernetes remote-node-ips-v6 { type ipv6_addr ; comment "Block egress ICMPv6 packet too big to remote Kubernetes nodes" ; } ` const v6PMTUDNFTRules = ` add table inet ovn-kubernetes -add rule inet ovn-kubernetes no-pmtud meta l4proto icmpv6 icmpv6 type 2 icmpv6 code 0 ip6 daddr @no-pmtud-remote-node-ips-v6 counter drop +add rule inet ovn-kubernetes no-pmtud meta l4proto icmpv6 icmpv6 type 2 icmpv6 code 0 ip6 daddr @remote-node-ips-v6 counter drop add chain inet ovn-kubernetes no-pmtud { type filter hook output priority 0 ; comment "Block egress needs frag/packet too big to remote k8s nodes" ; } -add set inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { type ipv4_addr ; comment "Block egress ICMP needs frag to remote Kubernetes nodes" ; } -add set inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { type ipv6_addr ; comment "Block egress ICMPv6 packet too big to remote Kubernetes nodes" ; } +add set inet ovn-kubernetes remote-node-ips-v4 { type ipv4_addr ; comment "Block egress ICMP needs frag to remote Kubernetes nodes" ; } +add set inet ovn-kubernetes remote-node-ips-v6 { type ipv6_addr ; comment "Block egress ICMPv6 packet too big to remote Kubernetes nodes" ; } ` var _ = Describe("Node", func() { @@ -755,6 +755,9 @@ var _ = Describe("Node", func() { node := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: nodeName, + Annotations: map[string]string{ + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIP+"/24"), + }, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -769,6 +772,9 @@ var _ = Describe("Node", func() { otherNode := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: remoteNodeName, + Annotations: map[string]string{ + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", otherNodeIP+"/24"), + }, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -806,7 +812,7 @@ var _ = Describe("Node", func() { cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) nc.initRetryFrameworkForNode() - err = setupPMTUDNFTSets() + err = setupRemoteNodeNFTSets() Expect(err).NotTo(HaveOccurred()) err = setupPMTUDNFTChain() Expect(err).NotTo(HaveOccurred()) @@ -830,7 +836,7 @@ var _ = Describe("Node", func() { err = nc.WatchNodes() Expect(err).NotTo(HaveOccurred()) nftRules := v4PMTUDNFTRules + ` -add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.254.61 } +add element inet ovn-kubernetes remote-node-ips-v4 { 169.254.254.61 } ` err = nodenft.MatchNFTRules(nftRules, nft.Dump()) Expect(err).NotTo(HaveOccurred()) @@ -860,6 +866,9 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.254.61 } node := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: nodeName, + Annotations: map[string]string{ + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIP+"/24"), + }, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -874,6 +883,9 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.254.61 } otherNode := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: remoteNodeName, + Annotations: map[string]string{ + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", otherSubnetNodeIP+"/24"), + }, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -911,7 +923,7 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.254.61 } cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) nc.initRetryFrameworkForNode() - err = setupPMTUDNFTSets() + err = setupRemoteNodeNFTSets() Expect(err).NotTo(HaveOccurred()) err = setupPMTUDNFTChain() Expect(err).NotTo(HaveOccurred()) @@ -935,7 +947,7 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.254.61 } err = nc.WatchNodes() Expect(err).NotTo(HaveOccurred()) nftRules := v4PMTUDNFTRules + ` -add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.253.61 } +add element inet ovn-kubernetes remote-node-ips-v4 { 169.254.253.61 } ` err = nodenft.MatchNFTRules(nftRules, nft.Dump()) Expect(err).NotTo(HaveOccurred()) @@ -1007,6 +1019,9 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.253.61 } node := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: nodeName, + Annotations: map[string]string{ + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIP+"/64"), + }, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -1021,6 +1036,9 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.253.61 } otherNode := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: remoteNodeName, + Annotations: map[string]string{ + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", otherNodeIP+"/64"), + }, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -1058,7 +1076,7 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.253.61 } cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) nc.initRetryFrameworkForNode() - err = setupPMTUDNFTSets() + err = setupRemoteNodeNFTSets() Expect(err).NotTo(HaveOccurred()) err = setupPMTUDNFTChain() Expect(err).NotTo(HaveOccurred()) @@ -1082,7 +1100,7 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.253.61 } err = nc.WatchNodes() Expect(err).NotTo(HaveOccurred()) nftRules := v6PMTUDNFTRules + ` -add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2001:db8:1::4 } +add element inet ovn-kubernetes remote-node-ips-v6 { 2001:db8:1::4 } ` err = nodenft.MatchNFTRules(nftRules, nft.Dump()) Expect(err).NotTo(HaveOccurred()) @@ -1111,6 +1129,9 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2001:db8:1::4 } node := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: nodeName, + Annotations: map[string]string{ + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIP+"/64"), + }, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -1125,6 +1146,9 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2001:db8:1::4 } otherNode := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: remoteNodeName, + Annotations: map[string]string{ + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", otherSubnetNodeIP+"/64"), + }, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -1162,7 +1186,7 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2001:db8:1::4 } cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) nc.initRetryFrameworkForNode() - err = setupPMTUDNFTSets() + err = setupRemoteNodeNFTSets() Expect(err).NotTo(HaveOccurred()) err = setupPMTUDNFTChain() Expect(err).NotTo(HaveOccurred()) @@ -1186,7 +1210,7 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2001:db8:1::4 } err = nc.WatchNodes() Expect(err).NotTo(HaveOccurred()) nftRules := v6PMTUDNFTRules + ` -add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2002:db8:1::4 } +add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } ` err = nodenft.MatchNFTRules(nftRules, nft.Dump()) Expect(err).NotTo(HaveOccurred()) @@ -1323,7 +1347,7 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2002:db8:1::4 } cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) nc.initRetryFrameworkForNode() - err = setupPMTUDNFTSets() + err = setupRemoteNodeNFTSets() Expect(err).NotTo(HaveOccurred()) err = setupPMTUDNFTChain() Expect(err).NotTo(HaveOccurred()) @@ -1444,7 +1468,7 @@ add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2002:db8:1::4 } cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) nc.initRetryFrameworkForNode() - err = setupPMTUDNFTSets() + err = setupRemoteNodeNFTSets() Expect(err).NotTo(HaveOccurred()) err = setupPMTUDNFTChain() Expect(err).NotTo(HaveOccurred()) diff --git a/go-controller/pkg/node/gateway.go b/go-controller/pkg/node/gateway.go index 9b43fc95a5..fa812377e7 100644 --- a/go-controller/pkg/node/gateway.go +++ b/go-controller/pkg/node/gateway.go @@ -521,9 +521,9 @@ func (g *gateway) addAllServices() []error { func (g *gateway) updateSNATRules() error { subnets := util.IPsToNetworkIPs(g.nodeIPManager.mgmtPort.GetAddresses()...) - if g.GetDefaultPodNetworkAdvertised() || config.Gateway.Mode != config.GatewayModeLocal { - return delLocalGatewayPodSubnetNATRules(subnets...) + if config.Gateway.Mode != config.GatewayModeLocal { + return delLocalGatewayPodSubnetNFTRules() } - return addLocalGatewayPodSubnetNATRules(subnets...) + return addOrUpdateLocalGatewayPodSubnetNFTRules(g.GetDefaultPodNetworkAdvertised(), subnets...) } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 79886dbf38..7bbfdbce98 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -80,6 +80,17 @@ add chain inet ovn-kubernetes udn-service-prerouting { type filter hook prerouti add rule inet ovn-kubernetes udn-service-prerouting iifname != %s jump udn-service-mark add chain inet ovn-kubernetes udn-service-output { type filter hook output priority -150 ; comment "UDN services packet mark - Output" ; } add rule inet ovn-kubernetes udn-service-output jump udn-service-mark +add chain inet ovn-kubernetes ovn-kube-udn-masq { comment "OVN UDN masquerade" ; } +add rule inet ovn-kubernetes ovn-kube-udn-masq ip saddr != 169.254.169.0/29 ip daddr != 172.16.1.0/24 ip saddr 169.254.169.0/24 masquerade +add rule inet ovn-kubernetes ovn-kube-local-gw-masq jump ovn-kube-udn-masq +` + +const baseLGWNFTablesRules = ` +add rule inet ovn-kubernetes ovn-kube-local-gw-masq ip saddr 169.254.169.1 masquerade +add chain inet ovn-kubernetes ovn-kube-local-gw-masq { type nat hook postrouting priority 101 ; comment "OVN local gateway masquerade" ; } +add rule inet ovn-kubernetes ovn-kube-local-gw-masq jump ovn-kube-pod-subnet-masq +add rule inet ovn-kubernetes ovn-kube-pod-subnet-masq ip saddr 10.1.1.0/24 masquerade +add chain inet ovn-kubernetes ovn-kube-pod-subnet-masq ` func getBaseNFTRules(mgmtPort string) string { @@ -90,6 +101,10 @@ func getBaseNFTRules(mgmtPort string) string { return ret } +func getBaseLGWNFTablesRules(mgmtPort string) string { + return getBaseNFTRules(mgmtPort) + baseLGWNFTablesRules +} + func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, eth0Name, eth0MAC, eth0GWIP, eth0CIDR string, gatewayVLANID uint, l netlink.Link, hwOffload, setNodeIP bool) { const mtu string = "1234" @@ -1358,10 +1373,6 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0` "OVN-KUBE-EXTERNALIP": []string{ fmt.Sprintf("-p %s -d %s --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, externalIP, service.Spec.Ports[0].Port, service.Spec.ClusterIP, service.Spec.Ports[0].Port), }, - "POSTROUTING": []string{ - "-s 169.254.169.1 -j MASQUERADE", - "-s 10.1.1.0/24 -j MASQUERADE", - }, "OVN-KUBE-ETP": []string{}, "OVN-KUBE-ITP": []string{}, }, @@ -1421,7 +1432,7 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0` err = f6.MatchState(expectedTables, nil) Expect(err).NotTo(HaveOccurred()) - expectedNFT := getBaseNFTRules(types.K8sMgmtIntfName) + expectedNFT := getBaseLGWNFTablesRules(types.K8sMgmtIntfName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) Expect(err).NotTo(HaveOccurred()) diff --git a/go-controller/pkg/node/gateway_iptables.go b/go-controller/pkg/node/gateway_iptables.go index e9b6b12387..90bffbe91f 100644 --- a/go-controller/pkg/node/gateway_iptables.go +++ b/go-controller/pkg/node/gateway_iptables.go @@ -21,11 +21,10 @@ import ( ) const ( - iptableNodePortChain = "OVN-KUBE-NODEPORT" // called from nat-PREROUTING and nat-OUTPUT - iptableExternalIPChain = "OVN-KUBE-EXTERNALIP" // called from nat-PREROUTING and nat-OUTPUT - iptableETPChain = "OVN-KUBE-ETP" // called from nat-PREROUTING only - iptableITPChain = "OVN-KUBE-ITP" // called from mangle-OUTPUT and nat-OUTPUT - iptableUDNMasqueradeChain = "OVN-KUBE-UDN-MASQUERADE" // called from nat-POSTROUTING + iptableNodePortChain = "OVN-KUBE-NODEPORT" // called from nat-PREROUTING and nat-OUTPUT + iptableExternalIPChain = "OVN-KUBE-EXTERNALIP" // called from nat-PREROUTING and nat-OUTPUT + iptableETPChain = "OVN-KUBE-ETP" // called from nat-PREROUTING only + iptableITPChain = "OVN-KUBE-ITP" // called from mangle-OUTPUT and nat-OUTPUT ) func clusterIPTablesProtocols() []iptables.Protocol { @@ -69,29 +68,11 @@ func restoreIptRulesFiltered(rules []nodeipt.Rule, filter map[string]map[string] return nodeipt.RestoreRulesFiltered(rules, filter) } -// appendIptRules adds the provided rules in an append fashion -// i.e each rule gets added at the last position in the chain -func appendIptRules(rules []nodeipt.Rule) error { - return nodeipt.AddRules(rules, true) -} - // deleteIptRules removes provided rules from the chain func deleteIptRules(rules []nodeipt.Rule) error { return nodeipt.DelRules(rules) } -// ensureChain ensures that a chain exists within a table -func ensureChain(table, chain string) error { - for _, proto := range clusterIPTablesProtocols() { - ipt, err := util.GetIPTablesHelper(proto) - if err != nil { - return fmt.Errorf("failed to get IPTables helper to add UDN chain: %v", err) - } - addChaintoTable(ipt, table, chain) - } - return nil -} - func getGatewayInitRules(chain string, proto iptables.Protocol) []nodeipt.Rule { iptRules := []nodeipt.Rule{} if chain == iptableITPChain { @@ -403,123 +384,8 @@ func getLocalGatewayFilterRules(ifname string, cidr *net.IPNet) []nodeipt.Rule { } } -func getLocalGatewayPodSubnetNATRules(cidr *net.IPNet) []nodeipt.Rule { - protocol := getIPTablesProtocol(cidr.IP.String()) - return []nodeipt.Rule{ - { - Table: "nat", - Chain: "POSTROUTING", - Args: []string{ - "-s", cidr.String(), - "-j", "MASQUERADE", - }, - Protocol: protocol, - }, - } -} - -// getUDNMasqueradeRules is only called for local-gateway-mode -func getUDNMasqueradeRules(protocol iptables.Protocol) []nodeipt.Rule { - // the following rules are actively used only for the UDN Feature: - // -A POSTROUTING -j OVN-KUBE-UDN-MASQUERADE - // -A OVN-KUBE-UDN-MASQUERADE -s 169.254.0.0/29 -j RETURN - // -A OVN-KUBE-UDN-MASQUERADE -d 10.96.0.0/16 -j RETURN - // -A OVN-KUBE-UDN-MASQUERADE -s 169.254.0.0/17 -j MASQUERADE - // NOTE: Ordering is important here, the RETURN must come before - // the MASQUERADE rule. Please don't change the ordering. - srcUDNMasqueradePrefix := config.Gateway.V4MasqueradeSubnet - ipFamily := utilnet.IPv4 - if protocol == iptables.ProtocolIPv6 { - srcUDNMasqueradePrefix = config.Gateway.V6MasqueradeSubnet - ipFamily = utilnet.IPv6 - } - // defaultNetworkReservedMasqueradePrefix contains the first 6 IPs in the - // masquerade range that shouldn't be masqueraded. Hence it's always 3 bits (8 - // IPs) wide, regardless of IP family. - _, ipnet, _ := net.ParseCIDR(srcUDNMasqueradePrefix) - _, len := ipnet.Mask.Size() - defaultNetworkReservedMasqueradePrefix := fmt.Sprintf("%s/%d", ipnet.IP.String(), len-3) - - rules := []nodeipt.Rule{ - { - Table: "nat", - Chain: "POSTROUTING", - Args: []string{"-j", iptableUDNMasqueradeChain}, // NOTE: AddRules will take care of creating the chain - Protocol: protocol, - }, - { - Table: "nat", - Chain: iptableUDNMasqueradeChain, - Args: []string{ - "-s", defaultNetworkReservedMasqueradePrefix, - "-j", "RETURN", - }, - Protocol: protocol, - }, - } - for _, svcCIDR := range config.Kubernetes.ServiceCIDRs { - if utilnet.IPFamilyOfCIDR(svcCIDR) != ipFamily { - continue - } - rules = append(rules, - nodeipt.Rule{ - Table: "nat", - Chain: iptableUDNMasqueradeChain, - Args: []string{ - "-d", svcCIDR.String(), - "-j", "RETURN", - }, - Protocol: protocol, - }, - ) - } - rules = append(rules, - nodeipt.Rule{ - Table: "nat", - Chain: iptableUDNMasqueradeChain, - Args: []string{ - "-s", srcUDNMasqueradePrefix, - "-j", "MASQUERADE", - }, - Protocol: protocol, - }, - ) - return rules -} - -func getLocalGatewayNATRules(cidr *net.IPNet) []nodeipt.Rule { - // Allow packets to/from the gateway interface in case defaults deny - protocol := getIPTablesProtocol(cidr.IP.String()) - masqueradeIP := config.Gateway.MasqueradeIPs.V4OVNMasqueradeIP - if protocol == iptables.ProtocolIPv6 { - masqueradeIP = config.Gateway.MasqueradeIPs.V6OVNMasqueradeIP - } - rules := append( - []nodeipt.Rule{ - { - Table: "nat", - Chain: "POSTROUTING", - Args: []string{ - "-s", masqueradeIP.String(), - "-j", "MASQUERADE", - }, - Protocol: protocol, - }, - }, - getLocalGatewayPodSubnetNATRules(cidr)..., - ) - - // FIXME(tssurya): If the feature is disabled we should be removing - // these rules - if util.IsNetworkSegmentationSupportEnabled() { - rules = append(rules, getUDNMasqueradeRules(protocol)...) - } - - return rules -} - -// initLocalGatewayNATRules sets up iptables rules for interfaces -func initLocalGatewayNATRules(ifname string, cidr *net.IPNet) error { +// initLocalGatewayIPTFilterRules sets up iptables rules for interfaces +func initLocalGatewayIPTFilterRules(ifname string, cidr *net.IPNet) error { // Insert the filter table rules because they need to be evaluated BEFORE the DROP rules // we have for forwarding. DO NOT change the ordering; specially important // during SGW->LGW rollouts and restarts. @@ -527,25 +393,8 @@ func initLocalGatewayNATRules(ifname string, cidr *net.IPNet) error { if err != nil { return fmt.Errorf("unable to insert forwarding rules %v", err) } - // append the masquerade rules in POSTROUTING table since that needs to be - // evaluated last. - return appendIptRules(getLocalGatewayNATRules(cidr)) -} - -func addLocalGatewayPodSubnetNATRules(cidrs ...*net.IPNet) error { - var rules []nodeipt.Rule - for _, cidr := range cidrs { - rules = append(rules, getLocalGatewayPodSubnetNATRules(cidr)...) - } - return appendIptRules(rules) -} - -func delLocalGatewayPodSubnetNATRules(cidrs ...*net.IPNet) error { - var rules []nodeipt.Rule - for _, cidr := range cidrs { - rules = append(rules, getLocalGatewayPodSubnetNATRules(cidr)...) - } - return deleteIptRules(rules) + // NOTE: nftables masquerade rules are now handled separately in initLocalGatewayNFTNATRules + return nil } func addChaintoTable(ipt util.IPTablesHelper, tableName, chain string) { diff --git a/go-controller/pkg/node/gateway_localnet.go b/go-controller/pkg/node/gateway_localnet.go index e0cc822844..6b8ed9aa0b 100644 --- a/go-controller/pkg/node/gateway_localnet.go +++ b/go-controller/pkg/node/gateway_localnet.go @@ -17,11 +17,11 @@ import ( func initLocalGateway(hostSubnets []*net.IPNet, mgmtPort managementport.Interface) error { klog.Info("Adding iptables masquerading rules for new local gateway") - if util.IsNetworkSegmentationSupportEnabled() { - if err := ensureChain("nat", iptableUDNMasqueradeChain); err != nil { - return fmt.Errorf("failed to ensure chain %s in NAT table: %w", iptableUDNMasqueradeChain, err) - } - } + + var allCIDRs []*net.IPNet + ifName := mgmtPort.GetInterfaceName() + + // First pass: collect all CIDRs and setup iptables filter rules per interface for _, hostSubnet := range hostSubnets { // local gateway mode uses mp0 as default path for all ingress traffic into OVN nextHop, err := util.MatchFirstIPNetFamily(utilnet.IsIPv6CIDR(hostSubnet), mgmtPort.GetAddresses()) @@ -32,11 +32,21 @@ func initLocalGateway(hostSubnets []*net.IPNet, mgmtPort managementport.Interfac // add iptables masquerading for mp0 to exit the host for egress cidr := nextHop.IP.Mask(nextHop.Mask) cidrNet := &net.IPNet{IP: cidr, Mask: nextHop.Mask} - ifName := mgmtPort.GetInterfaceName() - if err := initLocalGatewayNATRules(ifName, cidrNet); err != nil { + allCIDRs = append(allCIDRs, cidrNet) + + // Setup iptables filter rules for this interface/CIDR + if err := initLocalGatewayIPTFilterRules(ifName, cidrNet); err != nil { return fmt.Errorf("failed to add local NAT rules for: %s, err: %v", ifName, err) } } + + // setup nftables masquerade rules for all CIDRs (v4, v6 or dualstack) + if len(allCIDRs) > 0 { + if err := initLocalGatewayNFTNATRules(allCIDRs...); err != nil { + return fmt.Errorf("failed to setup nftables masquerade rules: %w", err) + } + } + return nil } diff --git a/go-controller/pkg/node/gateway_nftables.go b/go-controller/pkg/node/gateway_nftables.go index 842bb417d1..b38f2baebb 100644 --- a/go-controller/pkg/node/gateway_nftables.go +++ b/go-controller/pkg/node/gateway_nftables.go @@ -6,12 +6,14 @@ package node import ( "context" "fmt" + "net" "strings" corev1 "k8s.io/api/core/v1" utilnet "k8s.io/utils/net" "sigs.k8s.io/knftables" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/bridgeconfig" nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -27,6 +29,13 @@ import ( // use an "accept" rule to override a later "drop" rule), then those rules will need to // either both be iptables or both be nftables. +// nftables chain names +const ( + nftablesLocalGatewayMasqChain = "ovn-kube-local-gw-masq" + nftablesPodSubnetMasqChain = "ovn-kube-pod-subnet-masq" + nftablesUDNMasqChain = "ovn-kube-udn-masq" +) + // getNoSNATNodePortRules returns elements to add to the "mgmtport-no-snat-nodeports" // set to prevent SNAT of sourceIP when passing through the management port, for an // `externalTrafficPolicy: Local` service with NodePorts. @@ -186,3 +195,320 @@ func getUDNNFTRules(service *corev1.Service, netConfig *bridgeconfig.BridgeUDNCo } return rules } + +// getLocalGatewayPodSubnetMasqueradeNFTRule creates a rule for masquerading traffic from the pod subnet CIDR +// in local gateway node in a seperate chain which is then called from local gateway masquerade chain. +// +// chain ovn-kube-pod-subnet-masq { +// ip saddr 10.244.0.0/24 masquerade +// ip6 saddr fd00:10:244:1::/64 masquerade +// } +// +// If isAdvertisedNetwork is true, masquerade only when destination matches remote node IPs. +// Rules look like: +// ip saddr 10.244.0.0/24 ip daddr @remote-node-ips-v4 masquerade +// ip6 saddr fd00:10:244:1::/64 ip6 daddr @remote-node-ips-v6 masquerade +func getLocalGatewayPodSubnetMasqueradeNFTRule(cidr *net.IPNet, isAdvertisedNetwork bool) (*knftables.Rule, error) { + // Create the rule for masquerading traffic from the CIDR + var ipPrefix string + var remoteNodeSetName string + if utilnet.IsIPv6CIDR(cidr) { + ipPrefix = "ip6" + remoteNodeSetName = types.NFTRemoteNodeIPsv6 + } else { + ipPrefix = "ip" + remoteNodeSetName = types.NFTRemoteNodeIPsv4 + } + + // If network is advertised, only masquerade if destination is a remote node IP + var optionalDestRules []string + if isAdvertisedNetwork { + optionalDestRules = []string{ipPrefix, "daddr", "@", remoteNodeSetName} + } + rule := &knftables.Rule{ + Rule: knftables.Concat( + ipPrefix, "saddr", cidr, + optionalDestRules, + "masquerade", + ), + Chain: nftablesPodSubnetMasqChain, + } + + return rule, nil +} + +// getLocalGatewayNATNFTRules returns the nftables rules for local gateway NAT including masquerade IP rule, +// pod subnet rules, and UDN masquerade rules (if network segmentation is enabled). +// This function supports dual-stack by accepting multiple CIDRs and generating rules for all IP families. +// +// chain ovn-kube-local-gw-masq { +// comment "OVN local gateway masquerade" +// type nat hook postrouting priority srcnat; policy accept; +// ip saddr 169.254.0.1 masquerade +// ip6 saddr fd69::1 masquerade +// jump ovn-kube-pod-subnet-masq +// jump ovn-kube-udn-masq +// } +func getLocalGatewayNATNFTRules(cidrs ...*net.IPNet) ([]*knftables.Rule, error) { + var rules []*knftables.Rule + + // Process each CIDR to support dual-stack + for _, cidr := range cidrs { + // Determine IP version and masquerade IP + isIPv6 := utilnet.IsIPv6CIDR(cidr) + var masqueradeIP net.IP + var ipPrefix string + if isIPv6 { + masqueradeIP = config.Gateway.MasqueradeIPs.V6OVNMasqueradeIP + ipPrefix = "ip6" + } else { + masqueradeIP = config.Gateway.MasqueradeIPs.V4OVNMasqueradeIP + ipPrefix = "ip" + } + + // Rule1: Masquerade IP rule for the main chain + masqRule := &knftables.Rule{ + Chain: nftablesLocalGatewayMasqChain, + Rule: knftables.Concat( + ipPrefix, "saddr", masqueradeIP, + "masquerade", + ), + } + rules = append(rules, masqRule) + + // Rule2: Pod subnet NAT rule for the pod subnet chain + podSubnetRule, err := getLocalGatewayPodSubnetMasqueradeNFTRule(cidr, false) + if err != nil { + return nil, fmt.Errorf("failed to create pod subnet masquerade rule: %w", err) + } + rules = append(rules, podSubnetRule) + } + + // Rule 3: UDN masquerade rules (if network segmentation is enabled) + if util.IsNetworkSegmentationSupportEnabled() { + if config.IPv4Mode { + udnRules, err := getUDNMasqueradeNFTRules(utilnet.IPv4) + if err != nil { + return nil, fmt.Errorf("failed to create IPv4 UDN masquerade rules: %w", err) + } + rules = append(rules, udnRules...) + } + if config.IPv6Mode { + udnRules, err := getUDNMasqueradeNFTRules(utilnet.IPv6) + if err != nil { + return nil, fmt.Errorf("failed to create IPv6 UDN masquerade rules: %w", err) + } + rules = append(rules, udnRules...) + } + } + + return rules, nil +} + +// getUDNMasqueradeNFTRules returns the nftables rules for UDN masquerade. +// Chain creation is handled separately by setupLocalGatewayNATNFTRules. +// +// chain ovn-kube-udn-masq { +// comment "OVN UDN masquerade" +// ip saddr != 169.254.0.0/29 ip daddr != 10.96.0.0/16 ip saddr 169.254.0.0/17 masquerade +// ip6 saddr != fd69::/125 ip daddr != fd00:10:96::/112 ip6 saddr fd69::/112 masquerade +// } +func getUDNMasqueradeNFTRules(ipFamily utilnet.IPFamily) ([]*knftables.Rule, error) { + var rules []*knftables.Rule + + // Determine subnet and IP family + srcUDNMasqueradePrefix := config.Gateway.V4MasqueradeSubnet + ipPrefix := "ip" + if ipFamily == utilnet.IPv6 { + srcUDNMasqueradePrefix = config.Gateway.V6MasqueradeSubnet + ipPrefix = "ip6" + } + + // Calculate reserved masquerade prefix (first 8 IPs) + _, ipnet, err := net.ParseCIDR(srcUDNMasqueradePrefix) + if err != nil { + return nil, fmt.Errorf("failed to parse UDN masquerade subnet: %w", err) + } + _, prefixLen := ipnet.Mask.Size() + defaultNetworkReservedMasqueradePrefix := fmt.Sprintf("%s/%d", ipnet.IP.String(), prefixLen-3) + + // Rule: RETURN for reserved masquerade prefix and service CIDRs + // rest of the traffic is masqueraded + + for _, svcCIDR := range config.Kubernetes.ServiceCIDRs { + if utilnet.IPFamilyOfCIDR(svcCIDR) != ipFamily { + continue + } + masqueradeRule := &knftables.Rule{ + Chain: nftablesUDNMasqChain, + Rule: knftables.Concat( + ipPrefix, "saddr", "!=", defaultNetworkReservedMasqueradePrefix, // this guarantees we don't SNAT default network masqueradeIPs + ipPrefix, "daddr", "!=", svcCIDR, // this guarantees we don't SNAT service traffic + ipPrefix, "saddr", srcUDNMasqueradePrefix, // this guarantees we SNAT all UDN MasqueradeIPs traffic leaving the node + "masquerade", + ), + } + rules = append(rules, masqueradeRule) + } + + return rules, nil +} + +// initLocalGatewayNFTNATRules sets up nftables rules for local gateway NAT functionality +// This function supports dual-stack by accepting multiple CIDRs and generating rules for all IP families +func initLocalGatewayNFTNATRules(cidrs ...*net.IPNet) error { + nft, err := nodenft.GetNFTablesHelper() + if err != nil { + return fmt.Errorf("failed to get nftables helper: %w", err) + } + + // Create transaction and apply all chains and rules + tx := nft.NewTransaction() + + // Create main local gateway masquerade chain + // Use priority 101 instead of defaultknftables.SNATPriority (100) to ensure + // iptables egress IP rules in OVN-KUBE-EGRESS-IP-MULTI-NIC chain run first + // this also ensure for egress-services, the + // chain egress-services { + // type nat hook postrouting priority srcnat; policy accept; + // is called before the local gateway masquerade chain + localGwMasqChain := &knftables.Chain{ + Name: nftablesLocalGatewayMasqChain, + Comment: knftables.PtrTo("OVN local gateway masquerade"), + Type: knftables.PtrTo(knftables.NATType), + Hook: knftables.PtrTo(knftables.PostroutingHook), + Priority: knftables.PtrTo(knftables.BaseChainPriority("101")), + } + tx.Add(localGwMasqChain) + + // Create dedicated pod subnet masquerade chain + podSubnetMasqChain := &knftables.Chain{ + Name: nftablesPodSubnetMasqChain, + } + tx.Add(podSubnetMasqChain) + + // Create UDN masquerade chain only if network segmentation is enabled + var udnMasqChain *knftables.Chain + if util.IsNetworkSegmentationSupportEnabled() { + udnMasqChain = &knftables.Chain{ + Name: nftablesUDNMasqChain, + Comment: knftables.PtrTo("OVN UDN masquerade"), + } + tx.Add(udnMasqChain) + } + + // Flush existing chains to ensure clean state + tx.Flush(localGwMasqChain) + tx.Flush(podSubnetMasqChain) + if util.IsNetworkSegmentationSupportEnabled() { + tx.Flush(udnMasqChain) + } + + // Get the existing local gateway NAT rules + localGwRules, err := getLocalGatewayNATNFTRules(cidrs...) + if err != nil { + return fmt.Errorf("failed to get local gateway NAT rules: %w", err) + } + + // Add the main local gateway NAT rules + for _, rule := range localGwRules { + tx.Add(rule) + } + + // Add jump rule from main chain to pod subnet chain + jumpToPodSubnetRule := &knftables.Rule{ + Chain: nftablesLocalGatewayMasqChain, + Rule: knftables.Concat( + "jump", nftablesPodSubnetMasqChain, + ), + } + tx.Add(jumpToPodSubnetRule) + + // Add jump rule to UDN chain only if network segmentation is enabled + if util.IsNetworkSegmentationSupportEnabled() { + jumpToUDNRule := &knftables.Rule{ + Chain: nftablesLocalGatewayMasqChain, + Rule: knftables.Concat( + "jump", nftablesUDNMasqChain, + ), + } + tx.Add(jumpToUDNRule) + } + + err = nft.Run(context.TODO(), tx) + if err != nil { + return fmt.Errorf("failed to setup local gateway NAT nftables rules: %w", err) + } + + return nil +} + +// addOrUpdateLocalGatewayPodSubnetNFTRules adds nftables rules for pod subnet masquerading for multiple CIDRs +// These rules are added to the dedicated pod subnet masquerade chain. +// If the rules already exist, they are updated. +// If isAdvertisedNetwork is true, the masquerade rules also get a destination match +// that matches the remote node IP set. +func addOrUpdateLocalGatewayPodSubnetNFTRules(isAdvertisedNetwork bool, cidrs ...*net.IPNet) error { + nft, err := nodenft.GetNFTablesHelper() + if err != nil { + return fmt.Errorf("failed to get nftables helper: %w", err) + } + + tx := nft.NewTransaction() + + // Ensure the pod subnet chain exists + podSubnetChain := &knftables.Chain{ + Name: nftablesPodSubnetMasqChain, + } + tx.Add(podSubnetChain) + + // Flush the chain to remove all existing rules + // if network toggles between advertised and non-advertised, we need to flush the chain and re-add correct rules + tx.Flush(podSubnetChain) + + // Add the new rules for each CIDR + for _, cidr := range cidrs { + rule, err := getLocalGatewayPodSubnetMasqueradeNFTRule(cidr, isAdvertisedNetwork) + if err != nil { + return fmt.Errorf("failed to create nftables rules for CIDR %s: %w", cidr.String(), err) + } + + // Add the rule + tx.Add(rule) + } + + if err := nft.Run(context.TODO(), tx); err != nil { + return fmt.Errorf("failed to add pod subnet NAT rules: %w", err) + } + + return nil +} + +// delLocalGatewayPodSubnetNFTRules removes nftables rules for pod subnet masquerading for multiple CIDRs +// Since we use a separate chain, we can simply flush it to remove all pod subnet rules. +func delLocalGatewayPodSubnetNFTRules() error { + nft, err := nodenft.GetNFTablesHelper() + if err != nil { + return fmt.Errorf("failed to get nftables helper: %w", err) + } + + tx := nft.NewTransaction() + + // In shared gateway mode, this chain might not exist if its + // not migration from local gateway mode. In that case, let's + // use the idiomatic way of adding the chain before trying to flush it. + // I anyways also have the knftables.IsNotFound() check in the caller later. + tx.Add(&knftables.Chain{ + Name: nftablesPodSubnetMasqChain, + }) + + // Simply flush the dedicated pod subnet masquerade chain + // This removes all pod subnet masquerade rules at once + tx.Flush(&knftables.Chain{Name: nftablesPodSubnetMasqChain}) + + if err := nft.Run(context.TODO(), tx); err != nil && !knftables.IsNotFound(err) { + return fmt.Errorf("failed to delete pod subnet NAT rules: %w", err) + } + + return nil +} diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index bd83448ba4..35e409618b 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -368,7 +368,7 @@ func (npw *nodePortWatcher) updateServiceFlowCache(service *corev1.Service, netI var ofPorts []string // don't get the ports unless we need to as it is a costly operation if (len(extParsedIPs) > 0 || len(ingParsedIPs) > 0) && add { - ofPorts, err = util.GetOpenFlowPorts(npw.gwBridge.GetGatewayIface(), false) + ofPorts, err = util.GetOpenFlowPorts(npw.gwBridge.GetBridgeName(), false) if err != nil { // in the odd case that getting all ports from the bridge should not work, // simply output to LOCAL (this should work well in the vast majority of cases, anyway) diff --git a/go-controller/pkg/node/gateway_udn.go b/go-controller/pkg/node/gateway_udn.go index 026ecd94fc..32512d8287 100644 --- a/go-controller/pkg/node/gateway_udn.go +++ b/go-controller/pkg/node/gateway_udn.go @@ -89,6 +89,10 @@ type UserDefinedNetworkGateway struct { // gwInterfaceIndex holds the link index of gateway interface gwInterfaceIndex int + + // save BGP state at the start of reconciliation loop run to handle it consistently throughout the run + isNetworkAdvertisedToDefaultVRF bool + isNetworkAdvertised bool } func NewUserDefinedNetworkGateway(netInfo util.NetInfo, node *corev1.Node, nodeLister listers.NodeLister, @@ -225,18 +229,18 @@ func (udng *UserDefinedNetworkGateway) AddNetwork() error { return fmt.Errorf("could not add VRF %s routes for network %s, err: %v", vrfDeviceName, udng.GetNetworkName(), err) } - isNetworkAdvertised := util.IsPodNetworkAdvertisedAtNode(udng.NetInfo, udng.node.Name) + udng.updateAdvertisementStatus() // create the iprules for this network - if err = udng.updateUDNVRFIPRules(isNetworkAdvertised); err != nil { + if err = udng.updateUDNVRFIPRules(); err != nil { return fmt.Errorf("failed to update IP rules for network %s: %w", udng.GetNetworkName(), err) } - if err = udng.updateAdvertisedUDNIsolationRules(isNetworkAdvertised); err != nil { + if err = udng.updateAdvertisedUDNIsolationRules(); err != nil { return fmt.Errorf("failed to update isolation rules for network %s: %w", udng.GetNetworkName(), err) } - if err := udng.updateUDNVRFIPRoute(isNetworkAdvertised); err != nil { + if err := udng.updateUDNVRFIPRoute(); err != nil { return fmt.Errorf("failed to update ip routes for network %s: %w", udng.GetNetworkName(), err) } @@ -314,18 +318,16 @@ func (udng *UserDefinedNetworkGateway) DelNetwork() error { } } - if util.IsPodNetworkAdvertisedAtNode(udng.NetInfo, udng.node.Name) { - err := udng.updateAdvertisedUDNIsolationRules(false) - if err != nil { - return fmt.Errorf("failed to remove advertised UDN isolation rules for network %s: %w", udng.GetNetworkName(), err) - } + err := udng.deleteAdvertisedUDNIsolationRules() + if err != nil { + return fmt.Errorf("failed to remove advertised UDN isolation rules for network %s: %w", udng.GetNetworkName(), err) } if err := udng.delMarkChain(); err != nil { return err } // delete the management port interface for this network - err := udng.deleteUDNManagementPort() + err = udng.deleteUDNManagementPort() if err != nil { return err } @@ -483,8 +485,7 @@ func (udng *UserDefinedNetworkGateway) computeRoutesForUDN(mpLink netlink.Link) // Route2: Add default route: default via 172.18.0.1 dev breth0 mtu 1400 // necessary for UDN CNI and host-networked pods default traffic to go to node's gatewayIP - isNetworkAdvertised := util.IsPodNetworkAdvertisedAtNode(udng.NetInfo, udng.node.Name) - defaultRoute, err := udng.getDefaultRoute(isNetworkAdvertised) + defaultRoute, err := udng.getDefaultRouteExceptIfVRFLite() if err != nil { return nil, fmt.Errorf("unable to add default route for network %s, err: %v", udng.GetNetworkName(), err) } @@ -585,15 +586,7 @@ func (udng *UserDefinedNetworkGateway) computeRoutesForUDN(mpLink netlink.Link) return retVal, nil } -func (udng *UserDefinedNetworkGateway) getDefaultRoute(isNetworkAdvertised bool) ([]netlink.Route, error) { - vrfs := udng.GetPodNetworkAdvertisedOnNodeVRFs(udng.node.Name) - // If the network is advertised on a non default VRF then we should only consider routes received from external BGP - // device and not send any traffic based on default route similar to one present in default VRF. This is more important - // for VRF-Lite usecase where we need traffic to leave from vlan device instead of default gateway interface. - if isNetworkAdvertised && !slices.Contains(vrfs, types.DefaultNetworkName) { - return nil, nil - } - +func (udng *UserDefinedNetworkGateway) getDefaultRoute() ([]netlink.Route, error) { networkMTU := udng.NetInfo.MTU() if networkMTU == 0 { networkMTU = config.Default.MTU @@ -618,6 +611,16 @@ func (udng *UserDefinedNetworkGateway) getDefaultRoute(isNetworkAdvertised bool) return retVal, nil } +func (udng *UserDefinedNetworkGateway) getDefaultRouteExceptIfVRFLite() ([]netlink.Route, error) { + // If the network is advertised on a non default VRF then we should only consider routes received from external BGP + // device and not send any traffic based on default route similar to one present in default VRF. This is more important + // for VRF-Lite usecase where we need traffic to leave from vlan device instead of default gateway interface. + if udng.isNetworkAdvertised && !udng.isNetworkAdvertisedToDefaultVRF { + return nil, nil + } + return udng.getDefaultRoute() +} + // getV4MasqueradeIP returns the V4 management port masqueradeIP for this network func (udng *UserDefinedNetworkGateway) getV4MasqueradeIP() (*net.IPNet, error) { if !config.IPv4Mode { @@ -644,18 +647,18 @@ func (udng *UserDefinedNetworkGateway) getV6MasqueradeIP() (*net.IPNet, error) { // constructUDNVRFIPRules constructs rules that redirect matching packets // into the corresponding UDN VRF routing table. -// If the network is not advertised, an example of the rules we set for a -// network is: -// 2000: from all fwmark 0x1001 lookup 1007 -// 2000: from all to 169.254.0.12 lookup 1007 -// 2000: from all fwmark 0x1002 lookup 1009 -// 2000: from all to 169.254.0.14 lookup 1009 -// If the network is advertised, an example of the rules we set for a network is: +// +// When a network is not advertised on the default VRF, an example of the rules +// we set for it is: +// 2000: from all fwmark 0x1001 lookup 1007 +// 2000: from all to 169.254.0.12 lookup 1007 +// +// When a network is advertised on the default VRF, an example of the rules +// we set for it is: // 2000: from all fwmark 0x1001 lookup 1007 // 2000: from all to 10.132.0.0/14 lookup 1007 -// 2000: from all fwmark 0x1001 lookup 1009 -// 2000: from all to 10.134.0.0/14 lookup 1009 -func (udng *UserDefinedNetworkGateway) constructUDNVRFIPRules(isNetworkAdvertised bool) ([]netlink.Rule, []netlink.Rule, error) { +// 2000: from all to 169.254.0.12 lookup 1007 +func (udng *UserDefinedNetworkGateway) constructUDNVRFIPRules() ([]netlink.Rule, []netlink.Rule, error) { var addIPRules []netlink.Rule var delIPRules []netlink.Rule var masqIPRules []netlink.Rule @@ -688,12 +691,14 @@ func (udng *UserDefinedNetworkGateway) constructUDNVRFIPRules(isNetworkAdvertise } } switch { - case !isNetworkAdvertised: + case udng.isNetworkAdvertisedToDefaultVRF: + // the network is advertised to the default VRF addIPRules = append(addIPRules, masqIPRules...) - delIPRules = append(delIPRules, subnetIPRules...) - default: addIPRules = append(addIPRules, subnetIPRules...) - delIPRules = append(delIPRules, masqIPRules...) + default: + // network is not advertised on the default VRF + addIPRules = append(addIPRules, masqIPRules...) + delIPRules = append(delIPRules, subnetIPRules...) } return addIPRules, delIPRules, nil } @@ -791,19 +796,20 @@ func (udng *UserDefinedNetworkGateway) doReconcile() error { return fmt.Errorf("openflow manager with default bridge configuration has not been provided for network %s", udng.GetNetworkName()) } + udng.updateAdvertisementStatus() + // update bridge configuration - isNetworkAdvertised := util.IsPodNetworkAdvertisedAtNode(udng.NetInfo, udng.node.Name) netConfig := udng.openflowManager.defaultBridge.GetNetworkConfig(udng.GetNetworkName()) if netConfig == nil { return fmt.Errorf("missing bridge configuration for network %s", udng.GetNetworkName()) } - netConfig.Advertised.Store(isNetworkAdvertised) + netConfig.Advertised.Store(udng.isNetworkAdvertised) - if err := udng.updateUDNVRFIPRules(isNetworkAdvertised); err != nil { + if err := udng.updateUDNVRFIPRules(); err != nil { return fmt.Errorf("error while updating ip rule for UDN %s: %s", udng.GetNetworkName(), err) } - if err := udng.updateUDNVRFIPRoute(isNetworkAdvertised); err != nil { + if err := udng.updateUDNVRFIPRoute(); err != nil { return fmt.Errorf("error while updating ip route for UDN %s: %s", udng.GetNetworkName(), err) } @@ -817,16 +823,16 @@ func (udng *UserDefinedNetworkGateway) doReconcile() error { // let's sync these flows immediately udng.openflowManager.requestFlowSync() - if err := udng.updateAdvertisedUDNIsolationRules(isNetworkAdvertised); err != nil { + if err := udng.updateAdvertisedUDNIsolationRules(); err != nil { return fmt.Errorf("error while updating advertised UDN isolation rules for network %s: %w", udng.GetNetworkName(), err) } return nil } // updateUDNVRFIPRules updates IP rules for a network depending on whether the -// network is advertised or not -func (udng *UserDefinedNetworkGateway) updateUDNVRFIPRules(isNetworkAdvertised bool) error { - addIPRules, deleteIPRules, err := udng.constructUDNVRFIPRules(isNetworkAdvertised) +// network is advertised to the default VRF or not +func (udng *UserDefinedNetworkGateway) updateUDNVRFIPRules() error { + addIPRules, deleteIPRules, err := udng.constructUDNVRFIPRules() if err != nil { return fmt.Errorf("unable to get iprules for network %s, err: %v", udng.GetNetworkName(), err) } @@ -845,30 +851,40 @@ func (udng *UserDefinedNetworkGateway) updateUDNVRFIPRules(isNetworkAdvertised b } // Add or remove default route from a vrf device based on the network is -// advertised on its own network or default network -func (udng *UserDefinedNetworkGateway) updateUDNVRFIPRoute(isNetworkAdvertised bool) error { - vrfs := udng.GetPodNetworkAdvertisedOnNodeVRFs(udng.node.Name) - if isNetworkAdvertised && !slices.Contains(vrfs, types.DefaultNetworkName) { +// advertised on its own network or the default network +func (udng *UserDefinedNetworkGateway) updateUDNVRFIPRoute() error { + vrfName := util.GetNetworkVRFName(udng.NetInfo) + + switch { + case udng.isNetworkAdvertised && !udng.isNetworkAdvertisedToDefaultVRF: + // Remove default route for networks advertised to non-default VRF if err := udng.removeDefaultRouteFromVRF(); err != nil { - return fmt.Errorf("error while removing default route from VRF %s corresponding to network %s: %s", - util.GetNetworkVRFName(udng.NetInfo), udng.GetNetworkName(), err) + return fmt.Errorf("failed to remove default route from VRF %s for network %s: %v", + vrfName, udng.GetNetworkName(), err) } - } else if !isNetworkAdvertised || slices.Contains(vrfs, types.DefaultNetworkName) { - defaultRoute, err := udng.getDefaultRoute(isNetworkAdvertised) + + default: + // Add default route for networks that are either: + // - not advertised + // - advertised to default VRF + defaultRoute, err := udng.getDefaultRouteExceptIfVRFLite() if err != nil { - return fmt.Errorf("unable to get default route for network %s, err: %v", udng.GetNetworkName(), err) + return fmt.Errorf("failed to get default route for network %s: %v", + udng.GetNetworkName(), err) } - if err = udng.vrfManager.AddVRFRoutes(util.GetNetworkVRFName(udng.NetInfo), defaultRoute); err != nil { - return fmt.Errorf("error while adding default route to VRF %s corresponding to network %s, err: %v", - util.GetNetworkVRFName(udng.NetInfo), udng.GetNetworkName(), err) + + if err = udng.vrfManager.AddVRFRoutes(vrfName, defaultRoute); err != nil { + return fmt.Errorf("failed to add default route to VRF %s for network %s: %v", + vrfName, udng.GetNetworkName(), err) } } + return nil } func (udng *UserDefinedNetworkGateway) removeDefaultRouteFromVRF() error { vrfDeviceName := util.GetNetworkVRFName(udng.NetInfo) - defaultRoute, err := udng.getDefaultRoute(false) + defaultRoute, err := udng.getDefaultRoute() if err != nil { return fmt.Errorf("unable to get default route for network %s, err: %v", udng.GetNetworkName(), err) } @@ -897,39 +913,22 @@ func (udng *UserDefinedNetworkGateway) removeDefaultRouteFromVRF() error { // comment "advertised UDNs V4 subnets" // elements = { 10.10.0.0/16 comment "cluster_udn_l3network" } // } -func (udng *UserDefinedNetworkGateway) updateAdvertisedUDNIsolationRules(isNetworkAdvertised bool) error { +func (udng *UserDefinedNetworkGateway) updateAdvertisedUDNIsolationRules() error { + switch { + case udng.isNetworkAdvertised: + return udng.addAdvertisedUDNIsolationRules() + default: + return udng.deleteAdvertisedUDNIsolationRules() + } +} + +func (udng *UserDefinedNetworkGateway) addAdvertisedUDNIsolationRules() error { nft, err := nodenft.GetNFTablesHelper() if err != nil { return fmt.Errorf("failed to get nftables helper: %v", err) } tx := nft.NewTransaction() - if !isNetworkAdvertised { - existingV4, err := nft.ListElements(context.TODO(), "set", nftablesAdvertisedUDNsSetV4) - if err != nil { - if !knftables.IsNotFound(err) { - return fmt.Errorf("could not list existing items in %s set: %w", nftablesAdvertisedUDNsSetV4, err) - } - } - existingV6, err := nft.ListElements(context.TODO(), "set", nftablesAdvertisedUDNsSetV6) - if err != nil { - if !knftables.IsNotFound(err) { - return fmt.Errorf("could not list existing items in %s set: %w", nftablesAdvertisedUDNsSetV6, err) - } - } - - for _, elem := range append(existingV4, existingV6...) { - if elem.Comment != nil && *elem.Comment == udng.GetNetworkName() { - tx.Delete(elem) - } - } - - if tx.NumOperations() == 0 { - return nil - } - return nft.Run(context.TODO(), tx) - } - for _, udnNet := range udng.Subnets() { set := nftablesAdvertisedUDNsSetV4 if utilnet.IsIPv6CIDR(udnNet.CIDR) { @@ -948,3 +947,41 @@ func (udng *UserDefinedNetworkGateway) updateAdvertisedUDNIsolationRules(isNetwo } return nft.Run(context.TODO(), tx) } + +func (udng *UserDefinedNetworkGateway) deleteAdvertisedUDNIsolationRules() error { + nft, err := nodenft.GetNFTablesHelper() + if err != nil { + return fmt.Errorf("failed to get nftables helper: %v", err) + } + tx := nft.NewTransaction() + + existingV4, err := nft.ListElements(context.TODO(), "set", nftablesAdvertisedUDNsSetV4) + if err != nil { + if !knftables.IsNotFound(err) { + return fmt.Errorf("could not list existing items in %s set: %w", nftablesAdvertisedUDNsSetV4, err) + } + } + existingV6, err := nft.ListElements(context.TODO(), "set", nftablesAdvertisedUDNsSetV6) + if err != nil { + if !knftables.IsNotFound(err) { + return fmt.Errorf("could not list existing items in %s set: %w", nftablesAdvertisedUDNsSetV6, err) + } + } + + for _, elem := range append(existingV4, existingV6...) { + if elem.Comment != nil && *elem.Comment == udng.GetNetworkName() { + tx.Delete(elem) + } + } + + if tx.NumOperations() == 0 { + return nil + } + return nft.Run(context.TODO(), tx) +} + +func (udng *UserDefinedNetworkGateway) updateAdvertisementStatus() { + vrfs := udng.GetPodNetworkAdvertisedOnNodeVRFs(udng.node.Name) + udng.isNetworkAdvertised = len(vrfs) > 0 + udng.isNetworkAdvertisedToDefaultVRF = slices.Contains(vrfs, types.DefaultNetworkName) +} diff --git a/go-controller/pkg/node/gateway_udn_test.go b/go-controller/pkg/node/gateway_udn_test.go index 34848faf7e..7a72343e7d 100644 --- a/go-controller/pkg/node/gateway_udn_test.go +++ b/go-controller/pkg/node/gateway_udn_test.go @@ -1143,7 +1143,7 @@ var _ = Describe("UserDefinedNetworkGateway", func() { Expect(udnGateway.AddNetwork()).To(Succeed()) flowMap = udnGateway.gateway.openflowManager.flowCache - Expect(flowMap["DEFAULT"]).To(HaveLen(69)) // 18 UDN Flows and 5 advertisedUDN flows are added by default + Expect(flowMap["DEFAULT"]).To(HaveLen(71)) // 18 UDN Flows, 5 advertisedUDN flows, and 2 packet mark flows (IPv4+IPv6) are added by default Expect(udnGateway.openflowManager.defaultBridge.GetNetConfigLen()).To(Equal(2)) // default network + UDN network defaultUdnConfig := udnGateway.openflowManager.defaultBridge.GetNetworkConfig("default") bridgeUdnConfig := udnGateway.openflowManager.defaultBridge.GetNetworkConfig("bluenet") @@ -1166,7 +1166,9 @@ var _ = Describe("UserDefinedNetworkGateway", func() { // Check flows for default network service CIDR. bridgeconfig.CheckDefaultSvcIsolationOVSFlows(flowMap["DEFAULT"], defaultUdnConfig, ofPortHost, bridgeMAC, svcCIDR) - // Expect exactly one flow per advertised UDN for table 2 and table 0 for service isolation. + // Expect exactly two flow per advertised UDN for table 2 and table 0 for service isolation. + // but one of the flows used by advertised UDNs is already tracked and used by default UDNs hence not + // counted here but in the check above for default svc isolation flows. bridgeconfig.CheckAdvertisedUDNSvcIsolationOVSFlows(flowMap["DEFAULT"], bridgeUdnConfig, "bluenet", svcCIDR, 2) } @@ -1625,7 +1627,6 @@ func TestConstructUDNVRFIPRules(t *testing.T) { cidr := "" if config.IPv4Mode { cidr = "100.128.0.0/16/24" - } if config.IPv4Mode && config.IPv6Mode { cidr += ",ae70::/60/64" @@ -1655,7 +1656,7 @@ func TestConstructUDNVRFIPRules(t *testing.T) { }) g.Expect(err).NotTo(HaveOccurred()) udnGateway.vrfTableId = test.vrftableID - rules, delRules, err := udnGateway.constructUDNVRFIPRules(false) + rules, delRules, err := udnGateway.constructUDNVRFIPRules() g.Expect(err).ToNot(HaveOccurred()) for i, rule := range rules { g.Expect(rule.Priority).To(Equal(test.expectedRules[i].priority)) @@ -1677,7 +1678,7 @@ func TestConstructUDNVRFIPRules(t *testing.T) { } } -func TestConstructUDNVRFIPRulesPodNetworkAdvertised(t *testing.T) { +func TestConstructUDNVRFIPRulesPodNetworkAdvertisedToTheDefaultNetwork(t *testing.T) { type testRule struct { priority int family int @@ -1711,8 +1712,6 @@ func TestConstructUDNVRFIPRulesPodNetworkAdvertised(t *testing.T) { table: 1007, dst: *ovntest.MustParseIPNet("100.128.0.0/16"), }, - }, - deleteRules: []testRule{ { priority: UDNMasqueradeIPRulePriority, family: netlink.FAMILY_V4, @@ -1738,8 +1737,6 @@ func TestConstructUDNVRFIPRulesPodNetworkAdvertised(t *testing.T) { table: 1009, dst: *ovntest.MustParseIPNet("ae70::/60"), }, - }, - deleteRules: []testRule{ { priority: UDNMasqueradeIPRulePriority, family: netlink.FAMILY_V6, @@ -1777,6 +1774,181 @@ func TestConstructUDNVRFIPRulesPodNetworkAdvertised(t *testing.T) { table: 1010, dst: *ovntest.MustParseIPNet("ae70::/60"), }, + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V4, + table: 1010, + dst: *util.GetIPNetFullMaskFromIP(ovntest.MustParseIP("169.254.0.16")), + }, + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V6, + table: 1010, + dst: *util.GetIPNetFullMaskFromIP(ovntest.MustParseIP("fd69::10")), + }, + }, + v4mode: true, + v6mode: true, + }, + } + config.Gateway.V6MasqueradeSubnet = "fd69::/112" + config.Gateway.V4MasqueradeSubnet = "169.254.0.0/16" + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + g := NewWithT(t) + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + } + config.IPv4Mode = test.v4mode + config.IPv6Mode = test.v6mode + cidr := "" + if config.IPv4Mode { + cidr = "100.128.0.0/16/24" + } + if config.IPv4Mode && config.IPv6Mode { + cidr += ",ae70::/60/64" + } else if config.IPv6Mode { + cidr = "ae70::/60/64" + } + nad := ovntest.GenerateNAD("bluenet", "rednad", "greenamespace", + types.Layer3Topology, cidr, types.NetworkRolePrimary) + ovntest.AnnotateNADWithNetworkID("3", nad) + netInfo, err := util.ParseNADInfo(nad) + g.Expect(err).ToNot(HaveOccurred()) + mutableNetInfo := util.NewMutableNetInfo(netInfo) + mutableNetInfo.SetPodNetworkAdvertisedVRFs(map[string][]string{node.Name: {"bluenet"}}) + ofm := getDummyOpenflowManager() + // create dummy gateway interface(Need to run this test as root) + err = netlink.LinkAdd(&netlink.Dummy{ + LinkAttrs: netlink.LinkAttrs{ + Name: "breth0", + }, + }) + g.Expect(err).NotTo(HaveOccurred()) + udnGateway, err := NewUserDefinedNetworkGateway(mutableNetInfo, node, nil, nil, nil, nil, &gateway{openflowManager: ofm}) + g.Expect(err).NotTo(HaveOccurred()) + // delete dummy gateway interface after creating UDN gateway(Need to run this test as root) + err = netlink.LinkDel(&netlink.Dummy{ + LinkAttrs: netlink.LinkAttrs{ + Name: "breth0", + }, + }) + g.Expect(err).NotTo(HaveOccurred()) + udnGateway.vrfTableId = test.vrftableID + udnGateway.isNetworkAdvertised = true + udnGateway.isNetworkAdvertisedToDefaultVRF = true + rules, delRules, err := udnGateway.constructUDNVRFIPRules() + g.Expect(err).ToNot(HaveOccurred()) + for i, rule := range rules { + g.Expect(rule.Priority).To(Equal(test.expectedRules[i].priority)) + g.Expect(rule.Table).To(Equal(test.expectedRules[i].table)) + g.Expect(rule.Family).To(Equal(test.expectedRules[i].family)) + if rule.Dst != nil { + g.Expect(*rule.Dst).To(Equal(test.expectedRules[i].dst)) + } else { + g.Expect(rule.Mark).To(Equal(test.expectedRules[i].mark)) + } + } + for i, rule := range delRules { + g.Expect(rule.Priority).To(Equal(test.deleteRules[i].priority)) + g.Expect(rule.Table).To(Equal(test.deleteRules[i].table)) + g.Expect(rule.Family).To(Equal(test.deleteRules[i].family)) + g.Expect(*rule.Dst).To(Equal(test.deleteRules[i].dst)) + } + }) + } +} + +func TestConstructUDNVRFIPRulesPodNetworkAdvertisedToNoneDefaultNetwork(t *testing.T) { + type testRule struct { + priority int + family int + table int + mark uint32 + dst net.IPNet + } + type testConfig struct { + desc string + vrftableID int + v4mode bool + v6mode bool + expectedRules []testRule + deleteRules []testRule + } + + tests := []testConfig{ + { + desc: "v4 rule test", + vrftableID: 1007, + expectedRules: []testRule{ + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V4, + table: 1007, + mark: 0x1003, + }, + }, + deleteRules: []testRule{ + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V4, + table: 1007, + dst: *util.GetIPNetFullMaskFromIP(ovntest.MustParseIP("169.254.0.16")), + }, + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V4, + table: 1007, + dst: *ovntest.MustParseIPNet("100.128.0.0/16"), + }, + }, + v4mode: true, + }, + { + desc: "v6 rule test", + vrftableID: 1009, + expectedRules: []testRule{ + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V6, + table: 1009, + mark: 0x1003, + }, + }, + deleteRules: []testRule{ + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V6, + table: 1009, + dst: *util.GetIPNetFullMaskFromIP(ovntest.MustParseIP("fd69::10")), + }, + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V6, + table: 1009, + dst: *ovntest.MustParseIPNet("ae70::/60"), + }, + }, + v6mode: true, + }, + { + desc: "dualstack rule test", + vrftableID: 1010, + expectedRules: []testRule{ + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V4, + table: 1010, + mark: 0x1003, + }, + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V6, + table: 1010, + mark: 0x1003, + }, }, deleteRules: []testRule{ { @@ -1791,6 +1963,18 @@ func TestConstructUDNVRFIPRulesPodNetworkAdvertised(t *testing.T) { table: 1010, dst: *util.GetIPNetFullMaskFromIP(ovntest.MustParseIP("fd69::10")), }, + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V4, + table: 1010, + dst: *ovntest.MustParseIPNet("100.128.0.0/16"), + }, + { + priority: UDNMasqueradeIPRulePriority, + family: netlink.FAMILY_V6, + table: 1010, + dst: *ovntest.MustParseIPNet("ae70::/60"), + }, }, v4mode: true, v6mode: true, @@ -1842,8 +2026,12 @@ func TestConstructUDNVRFIPRulesPodNetworkAdvertised(t *testing.T) { }) g.Expect(err).NotTo(HaveOccurred()) udnGateway.vrfTableId = test.vrftableID - rules, delRules, err := udnGateway.constructUDNVRFIPRules(true) + udnGateway.isNetworkAdvertised = true + udnGateway.isNetworkAdvertisedToDefaultVRF = false + rules, delRules, err := udnGateway.constructUDNVRFIPRules() g.Expect(err).ToNot(HaveOccurred()) + g.Expect(rules).To(HaveLen(len(test.expectedRules))) + g.Expect(delRules).To(HaveLen(len(test.deleteRules))) for i, rule := range rules { g.Expect(rule.Priority).To(Equal(test.expectedRules[i].priority)) g.Expect(rule.Table).To(Equal(test.expectedRules[i].table)) @@ -1973,7 +2161,8 @@ func TestUserDefinedNetworkGateway_updateAdvertisedUDNIsolationRules(t *testing. udng := &UserDefinedNetworkGateway{ NetInfo: netInfo, } - err = udng.updateAdvertisedUDNIsolationRules(tt.isNetworkAdvertised) + udng.isNetworkAdvertised = tt.isNetworkAdvertised + err = udng.updateAdvertisedUDNIsolationRules() g.Expect(err).NotTo(HaveOccurred()) v4Elems, err := nft.ListElements(context.TODO(), "set", nftablesAdvertisedUDNsSetV4) diff --git a/go-controller/pkg/node/node_nftables.go b/go-controller/pkg/node/node_nftables.go index e52a8970a4..ca4afc9ac2 100644 --- a/go-controller/pkg/node/node_nftables.go +++ b/go-controller/pkg/node/node_nftables.go @@ -13,8 +13,8 @@ import ( const nftPMTUDChain = "no-pmtud" -// setupPMTUDNFTSets sets up the NFT sets that contain remote Kubernetes node IPs -func setupPMTUDNFTSets() error { +// setupRemoteNodeNFTSets sets up the NFT sets that contain remote Kubernetes node IPs +func setupRemoteNodeNFTSets() error { nft, err := nodenft.GetNFTablesHelper() if err != nil { return fmt.Errorf("failed to get nftables helper: %w", err) @@ -22,12 +22,12 @@ func setupPMTUDNFTSets() error { tx := nft.NewTransaction() tx.Add(&knftables.Set{ - Name: types.NFTNoPMTUDRemoteNodeIPsv4, + Name: types.NFTRemoteNodeIPsv4, Comment: knftables.PtrTo("Block egress ICMP needs frag to remote Kubernetes nodes"), Type: "ipv4_addr", }) tx.Add(&knftables.Set{ - Name: types.NFTNoPMTUDRemoteNodeIPsv6, + Name: types.NFTRemoteNodeIPsv6, Comment: knftables.PtrTo("Block egress ICMPv6 packet too big to remote Kubernetes nodes"), Type: "ipv6_addr", }) @@ -68,7 +68,7 @@ func setupPMTUDNFTChain() error { tx.Add(&knftables.Rule{ Chain: nftPMTUDChain, Rule: knftables.Concat( - "ip daddr @"+types.NFTNoPMTUDRemoteNodeIPsv4, + "ip daddr @"+types.NFTRemoteNodeIPsv4, "meta l4proto icmp", "icmp type 3", // type 3 == Destination Unreachable "icmp code 4", // code 4 indicates fragmentation needed @@ -85,7 +85,7 @@ func setupPMTUDNFTChain() error { "meta l4proto icmpv6", // match on ICMPv6 packets "icmpv6 type 2", // type 2 == Packet Too Big (PMTUD) "icmpv6 code 0", // code 0 for that message - "ip6 daddr @"+types.NFTNoPMTUDRemoteNodeIPsv6, + "ip6 daddr @"+types.NFTRemoteNodeIPsv6, counterIfDebug, "drop", // drop the packet ), diff --git a/go-controller/pkg/node/obj_retry_node.go b/go-controller/pkg/node/obj_retry_node.go index 9c9657678e..646cca2ac3 100644 --- a/go-controller/pkg/node/obj_retry_node.go +++ b/go-controller/pkg/node/obj_retry_node.go @@ -238,34 +238,43 @@ func (h *nodeEventHandler) UpdateResource(oldObj, newObj interface{}, _ bool) er return nil } - // remote node that is changing - ipsToKeep := map[string]bool{} - for _, address := range newNode.Status.Addresses { - if address.Type != corev1.NodeInternalIP { - continue + if util.NodeHostCIDRsAnnotationChanged(oldNode, newNode) { + // remote node that is changing + // Use GetNodeAddresses to get new node IPs + newIPsv4, newIPsv6, err := util.GetNodeAddresses(config.IPv4Mode, config.IPv6Mode, newNode) + if err != nil { + return fmt.Errorf("failed to get addresses for new node %q: %w", newNode.Name, err) } - nodeIP := net.ParseIP(address.Address) - if nodeIP == nil { - continue + + ipsToKeep := map[string]bool{} + for _, nodeIP := range newIPsv4 { + ipsToKeep[nodeIP.String()] = true } - ipsToKeep[nodeIP.String()] = true - } - ipsToRemove := make([]net.IP, 0) - for _, address := range oldNode.Status.Addresses { - if address.Type != corev1.NodeInternalIP { - continue + for _, nodeIP := range newIPsv6 { + ipsToKeep[nodeIP.String()] = true } - nodeIP := net.ParseIP(address.Address) - if nodeIP == nil { - continue + + // Use GetNodeAddresses to get old node IPs + oldIPsv4, oldIPsv6, err := util.GetNodeAddresses(config.IPv4Mode, config.IPv6Mode, oldNode) + if err != nil { + return fmt.Errorf("failed to get addresses for old node %q: %w", oldNode.Name, err) } - if _, exists := ipsToKeep[nodeIP.String()]; !exists { - ipsToRemove = append(ipsToRemove, nodeIP) + + ipsToRemove := make([]net.IP, 0) + for _, nodeIP := range oldIPsv4 { + if _, exists := ipsToKeep[nodeIP.String()]; !exists { + ipsToRemove = append(ipsToRemove, nodeIP) + } + } + for _, nodeIP := range oldIPsv6 { + if _, exists := ipsToKeep[nodeIP.String()]; !exists { + ipsToRemove = append(ipsToRemove, nodeIP) + } } - } - if err := removePMTUDNodeNFTRules(ipsToRemove); err != nil { - return fmt.Errorf("error removing node %q stale NFT rules during update: %w", oldNode.Name, err) + if err := removePMTUDNodeNFTRules(ipsToRemove); err != nil { + return fmt.Errorf("error removing node %q stale NFT rules during update: %w", oldNode.Name, err) + } } return h.nc.addOrUpdateNode(newNode) diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index f9c6d0b18f..4e4415f78c 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -28,6 +28,8 @@ import ( libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/udnenabledsvc" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/persistentips" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -812,7 +814,7 @@ func (oc *BaseSecondaryNetworkController) allowPersistentIPs() bool { // buildUDNEgressSNAT is used to build the conditional SNAT required on L3 and L2 UDNs to // steer traffic correctly via mp0 when leaving OVN to the host -func (bsnc *BaseSecondaryNetworkController) buildUDNEgressSNAT(localPodSubnets []*net.IPNet, outputPort string) ([]*nbdb.NAT, error) { +func (bsnc *BaseSecondaryNetworkController) buildUDNEgressSNAT(localPodSubnets []*net.IPNet, outputPort string, isUDNAdvertised bool) ([]*nbdb.NAT, error) { if len(localPodSubnets) == 0 { return nil, nil // nothing to do } @@ -822,16 +824,19 @@ func (bsnc *BaseSecondaryNetworkController) buildUDNEgressSNAT(localPodSubnets [ networkID := bsnc.GetNetworkID() // calculate MAC dstMac := util.IPAddrToHWAddr(util.GetNodeManagementIfAddr(localPodSubnets[0]).IP) + dstMacMatch := getMasqueradeManagementIPSNATMatch(dstMac.String()) extIDs := map[string]string{ types.NetworkExternalID: bsnc.GetNetworkName(), types.TopologyExternalID: bsnc.TopologyType(), } for _, localPodSubnet := range localPodSubnets { + snatMatch := dstMacMatch + ipFamily := utilnet.IPv4 + masqIP, err = udn.AllocateV4MasqueradeIPs(networkID) if utilnet.IsIPv6CIDR(localPodSubnet) { masqIP, err = udn.AllocateV6MasqueradeIPs(networkID) - } else { - masqIP, err = udn.AllocateV4MasqueradeIPs(networkID) + ipFamily = utilnet.IPv6 } if err != nil { return nil, err @@ -839,12 +844,76 @@ func (bsnc *BaseSecondaryNetworkController) buildUDNEgressSNAT(localPodSubnets [ if masqIP == nil { return nil, fmt.Errorf("masquerade IP cannot be empty network %s (%d): %v", bsnc.GetNetworkName(), networkID, err) } - snats = append(snats, libovsdbops.BuildSNATWithMatch(&masqIP.ManagementPort.IP, localPodSubnet, outputPort, - extIDs, getMasqueradeManagementIPSNATMatch(dstMac.String()))) + + if isUDNAdvertised { + // For advertised networks, we need to SNAT any traffic leaving the + // pods from these networks towards the node IPs in the cluster. In + // order to do such a conditional SNAT, we need an address set that + // contains the node IPs in the cluster. Given that egressIP feature + // already has an address set containing these nodeIPs owned by the + // default network controller, let's re-use it. + nodeIPsASIDs := getEgressIPAddrSetDbIDs(NodeIPAddrSetName, types.DefaultNetworkName, DefaultNetworkControllerName) + nodeIPsAS, err := bsnc.addressSetFactory.GetAddressSet(nodeIPsASIDs) + if err != nil { + return nil, fmt.Errorf("failed to get address set with IDs %v: %w", nodeIPsASIDs, err) + } + + // We also need to SNAT any traffic leaving the pods from these + // networks towards the default network service cluster IPs + // accessible from UDNs: we want the reply traffic to hit the + // masquerade IP rule rather than the UDN subnet ip rule to allow + // for overlaps in VRF-Lite configurations + svcIPsASIDs := udnenabledsvc.GetAddressSetDBIDs() + svcIPsAS, err := bsnc.addressSetFactory.GetAddressSet(svcIPsASIDs) + if err != nil { + return nil, fmt.Errorf("failed to get address set with IDs %v: %w", svcIPsASIDs, err) + } + + additionalSNATMatch := getClusterNodesDestinationBasedSNATMatch(ipFamily, nodeIPsAS, svcIPsAS) + if additionalSNATMatch != "" { + snatMatch = fmt.Sprintf("%s && %s", snatMatch, additionalSNATMatch) + } + } + + snat := libovsdbops.BuildSNATWithMatch( + &masqIP.ManagementPort.IP, + localPodSubnet, + outputPort, + extIDs, + snatMatch, + ) + snats = append(snats, snat) } + return snats, nil } +func getMasqueradeManagementIPSNATMatch(dstMac string) string { + return fmt.Sprintf("eth.dst == %s", dstMac) +} + +// getClusterNodesDestinationBasedSNATMatch creates destination-based SNAT match for the specified IP family +func getClusterNodesDestinationBasedSNATMatch(ipFamily utilnet.IPFamily, addressSets ...addressset.AddressSet) string { + asMatches := make([]string, 0, len(addressSets)) + for _, as := range addressSets { + asIPv4, asIPv6 := as.GetASHashNames() + switch { + case ipFamily == utilnet.IPv4 && asIPv4 != "": + asMatches = append(asMatches, fmt.Sprintf("ip4.dst == $%s", asIPv4)) + case ipFamily == utilnet.IPv6 && asIPv6 != "": + asMatches = append(asMatches, fmt.Sprintf("ip6.dst == $%s", asIPv6)) + } + } + switch len(asMatches) { + case 0: + return "" + case 1: + return asMatches[0] + default: + return fmt.Sprintf("(%s)", strings.Join(asMatches, " || ")) + } +} + func (bsnc *BaseSecondaryNetworkController) ensureDHCP(pod *corev1.Pod, podAnnotation *util.PodAnnotation, lsp *nbdb.LogicalSwitchPort) error { opts := []kubevirt.DHCPConfigsOpt{} @@ -867,10 +936,6 @@ func (bsnc *BaseSecondaryNetworkController) ensureDHCP(pod *corev1.Pod, podAnnot return kubevirt.EnsureDHCPOptionsForLSP(bsnc.controllerName, bsnc.nbClient, pod, podAnnotation.IPs, lsp, opts...) } -func getMasqueradeManagementIPSNATMatch(dstMac string) string { - return fmt.Sprintf("eth.dst == %s", dstMac) -} - func (bsnc *BaseSecondaryNetworkController) requireDHCP(pod *corev1.Pod) bool { // Configure DHCP only for kubevirt VMs layer2 primary udn with subnets return kubevirt.IsPodOwnedByVirtualMachine(pod) && diff --git a/go-controller/pkg/ovn/egressgw.go b/go-controller/pkg/ovn/egressgw.go index b607a3b253..d9d8610aba 100644 --- a/go-controller/pkg/ovn/egressgw.go +++ b/go-controller/pkg/ovn/egressgw.go @@ -589,7 +589,7 @@ func (oc *DefaultNetworkController) deletePodSNAT(nodeName string, extIPs, podIP return nil } // Default network does not set any matches in Pod SNAT - ops, err := deletePodSNATOps(oc.nbClient, nil, oc.GetNetworkScopedGWRouterName(nodeName), extIPs, podIPNets, "") + ops, err := deletePodSNATOps(oc.nbClient, nil, oc.GetNetworkScopedGWRouterName(nodeName), extIPs, podIPNets) if err != nil { return err } @@ -639,8 +639,8 @@ func getExternalIPsGR(watchFactory *factory.WatchFactory, nodeName string) ([]*n // deletePodSNATOps creates ovsdb operation that removes per pod SNAT rules towards the nodeIP that are applied to the GR where the pod resides // used when disableSNATMultipleGWs=true -func deletePodSNATOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, gwRouterName string, extIPs, podIPNets []*net.IPNet, match string) ([]ovsdb.Operation, error) { - nats, err := buildPodSNAT(extIPs, podIPNets, match) +func deletePodSNATOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, gwRouterName string, extIPs, podIPNets []*net.IPNet) ([]ovsdb.Operation, error) { + nats, err := buildPodSNAT(extIPs, podIPNets, "") // for delete, match is not needed - we try to cleanup all the SNATs that match the isEquivalentNAT predicate if err != nil { return nil, err } @@ -657,7 +657,7 @@ func deletePodSNATOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, gwR // addOrUpdatePodSNAT adds or updates per pod SNAT rules towards the nodeIP that are applied to the GR where the pod resides // used when disableSNATMultipleGWs=true func addOrUpdatePodSNAT(nbClient libovsdbclient.Client, gwRouterName string, extIPs, podIfAddrs []*net.IPNet) error { - ops, err := addOrUpdatePodSNATOps(nbClient, gwRouterName, extIPs, podIfAddrs, nil) + ops, err := addOrUpdatePodSNATOps(nbClient, gwRouterName, extIPs, podIfAddrs, "", nil) if err != nil { return err } @@ -670,9 +670,9 @@ func addOrUpdatePodSNAT(nbClient libovsdbclient.Client, gwRouterName string, ext // addOrUpdatePodSNATOps returns the operation that adds or updates per pod SNAT rules towards the nodeIP that are // applied to the GR where the pod resides // used when disableSNATMultipleGWs=true -func addOrUpdatePodSNATOps(nbClient libovsdbclient.Client, gwRouterName string, extIPs, podIfAddrs []*net.IPNet, ops []ovsdb.Operation) ([]ovsdb.Operation, error) { +func addOrUpdatePodSNATOps(nbClient libovsdbclient.Client, gwRouterName string, extIPs, podIfAddrs []*net.IPNet, snatMatch string, ops []ovsdb.Operation) ([]ovsdb.Operation, error) { gwRouter := &nbdb.LogicalRouter{Name: gwRouterName} - nats, err := buildPodSNAT(extIPs, podIfAddrs, "") + nats, err := buildPodSNAT(extIPs, podIfAddrs, snatMatch) if err != nil { return nil, err } diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 5f50cefb95..ed018c0de3 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -249,7 +249,7 @@ func NewEIPController( // CASE 3.4: Both Namespace && Pod Selectors on Spec changed // } // -// NOTE: `Spec.EgressIPs“ updates for EIP object are not processed here, that is the job of cluster manager +// NOTE: `Spec.EgressIPs" updates for EIP object are not processed here, that is the job of cluster manager // // We only care about `Spec.NamespaceSelector`, `Spec.PodSelector` and `Status` field func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (err error) { @@ -2594,9 +2594,21 @@ func (e *EgressIPController) addExternalGWPodSNATOps(ni util.NetInfo, ops []ovsd if err != nil { return nil, err } - ops, err = addOrUpdatePodSNATOps(e.nbClient, ni.GetNetworkScopedGWRouterName(pod.Spec.NodeName), extIPs, podIPs, ops) - if err != nil { - return nil, err + + // Handle each pod IP individually since each IP family needs its own SNAT match + for _, podIP := range podIPs { + ipFamily := utilnet.IPv4 + if utilnet.IsIPv6CIDR(podIP) { + ipFamily = utilnet.IPv6 + } + snatMatch, err := GetNetworkScopedClusterSubnetSNATMatch(e.nbClient, ni, pod.Spec.NodeName, util.IsPodNetworkAdvertisedAtNode(ni, pod.Spec.NodeName), ipFamily) + if err != nil { + return nil, fmt.Errorf("failed to get SNAT match for node %s for network %s: %w", pod.Spec.NodeName, ni.GetNetworkName(), err) + } + ops, err = addOrUpdatePodSNATOps(e.nbClient, ni.GetNetworkScopedGWRouterName(pod.Spec.NodeName), extIPs, []*net.IPNet{podIP}, snatMatch, ops) + if err != nil { + return nil, err + } } klog.V(5).Infof("Adding SNAT on %s since egress node managing %s/%s was the same: %s", pod.Spec.NodeName, pod.Namespace, pod.Name, status.Node) } @@ -2617,7 +2629,7 @@ func (e *EgressIPController) deleteExternalGWPodSNATOps(ni util.NetInfo, ops []o if err != nil { return nil, err } - ops, err = deletePodSNATOps(e.nbClient, ops, ni.GetNetworkScopedGWRouterName(pod.Spec.NodeName), extIPs, affectedIPs, "") + ops, err = deletePodSNATOps(e.nbClient, ops, ni.GetNetworkScopedGWRouterName(pod.Spec.NodeName), extIPs, affectedIPs) if err != nil { return nil, err } diff --git a/go-controller/pkg/ovn/gateway.go b/go-controller/pkg/ovn/gateway.go index a43adf5368..85611fe9a2 100644 --- a/go-controller/pkg/ovn/gateway.go +++ b/go-controller/pkg/ovn/gateway.go @@ -25,6 +25,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/gateway" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/gatewayrouter" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -42,7 +43,6 @@ type GatewayManager struct { nbClient libovsdbclient.Client netInfo util.NetInfo watchFactory *factory.WatchFactory - // Cluster wide Load_Balancer_Group UUID. // Includes all node switches and node gateway routers. clusterLoadBalancerGroupUUID string @@ -143,8 +143,8 @@ func WithLoadBalancerGroups(routerLBGroup, clusterLBGroup, switchLBGroup string) } // cleanupStalePodSNATs removes pod SNATs against nodeIP for the given node if -// the SNAT.logicalIP isn't an active podIP, the pod network is being advertised -// on this node or disableSNATMultipleGWs=false. We don't have to worry about +// the SNAT.logicalIP isn't an active podIP, or disableSNATMultipleGWs=false. +// We don't have to worry about // missing SNATs that should be added because addLogicalPort takes care of this // for all pods when RequestRetryObjs is called for each node add. // Other non-pod SNATs like join subnet SNATs are ignored. @@ -154,11 +154,11 @@ func WithLoadBalancerGroups(routerLBGroup, clusterLBGroup, switchLBGroup string) // pod->nodeSNATs which won't get cleared up unless explicitly deleted. // NOTE2: egressIP SNATs are synced in EIP controller. func (gw *GatewayManager) cleanupStalePodSNATs(nodeName string, nodeIPs []*net.IPNet, gwLRPIPs []net.IP) error { - // collect all the pod IPs for which we should be doing the SNAT; if the pod - // network is advertised or DisableSNATMultipleGWs==false we consider all + // collect all the pod IPs for which we should be doing the SNAT; + // if DisableSNATMultipleGWs==false we consider all // the SNATs stale podIPsWithSNAT := sets.New[string]() - if !gw.isRoutingAdvertised(nodeName) && config.Gateway.DisableSNATMultipleGWs { + if config.Gateway.DisableSNATMultipleGWs { pods, err := gw.watchFactory.GetAllPods() if err != nil { return fmt.Errorf("unable to list existing pods on node: %s, %w", @@ -231,7 +231,6 @@ func (gw *GatewayManager) cleanupStalePodSNATs(nodeName string, nodeIPs []*net.I } natsToDelete = append(natsToDelete, routerNat) } - if len(natsToDelete) > 0 { err := libovsdbops.DeleteNATs(gw.nbClient, gatewayRouter, natsToDelete...) if err != nil { @@ -764,7 +763,9 @@ func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []* nats := make([]*nbdb.NAT, 0, len(clusterIPSubnet)) var nat *nbdb.NAT - if (!config.Gateway.DisableSNATMultipleGWs || gw.netInfo.IsPrimaryNetwork()) && !gw.isRoutingAdvertised(nodeName) { + // DisableSNATMultipleGWs is only applicable to cluster default network and not to user defined networks. + // For user defined networks, we always add SNAT rules regardless of whether the network is advertised or not. + if !config.Gateway.DisableSNATMultipleGWs || gw.netInfo.IsPrimaryNetwork() { // Default SNAT rules. DisableSNATMultipleGWs=false in LGW (traffic egresses via mp0) always. // We are not checking for gateway mode to be shared explicitly to reduce topology differences. for _, entry := range clusterIPSubnet { @@ -774,7 +775,17 @@ func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []* gw.gwRouterName, err) } - nat = libovsdbops.BuildSNATWithMatch(&externalIP[0], entry, "", extIDs, gw.netInfo.GetNetworkScopedClusterSubnetSNATMatch(nodeName)) + // Get the match for this specific subnet's IP family + ipFamily := utilnet.IPv4 + if utilnet.IsIPv6CIDR(entry) { + ipFamily = utilnet.IPv6 + } + snatMatch, err := GetNetworkScopedClusterSubnetSNATMatch(gw.nbClient, gw.netInfo, nodeName, gw.isRoutingAdvertised(nodeName), ipFamily) + if err != nil { + return fmt.Errorf("failed to get SNAT match for node %s for network %s: %w", nodeName, gw.netInfo.GetNetworkName(), err) + } + + nat = libovsdbops.BuildSNATWithMatch(&externalIP[0], entry, "", extIDs, snatMatch) nats = append(nats, nat) } err = libovsdbops.CreateOrUpdateNATs(gw.nbClient, gwRouter, nats...) @@ -784,7 +795,7 @@ func (gw *GatewayManager) updateGWRouterNAT(nodeName string, clusterIPSubnet []* } else { // ensure we do not have any leftover SNAT entries after an upgrade for _, logicalSubnet := range clusterIPSubnet { - nat = libovsdbops.BuildSNATWithMatch(nil, logicalSubnet, "", extIDs, gw.netInfo.GetNetworkScopedClusterSubnetSNATMatch(nodeName)) + nat = libovsdbops.BuildSNAT(nil, logicalSubnet, "", extIDs) nats = append(nats, nat) } err = libovsdbops.DeleteNATs(gw.nbClient, gwRouter, nats...) @@ -902,6 +913,36 @@ func (gw *GatewayManager) gatewayInit( return nil } +// GetNetworkScopedClusterSubnetSNATMatch returns the match for the SNAT rule for the cluster default network +// and the match for the SNAT rule for the L3/L2 user defined network. +// If the network is not advertised: +// - For Layer2 topology, the match is the output port of the GR to the join switch since in L2 there is only 1 router but two cSNATs. +// - For Layer3 topology, the match is empty. +// If the network is advertised: +// - For Layer2 topology, the match is the output port of the GR to the join switch and the destination must be a nodeIP in the cluster. +// - For Layer3 topology, the match is the destination must be a nodeIP in the cluster. +func GetNetworkScopedClusterSubnetSNATMatch(nbClient libovsdbclient.Client, netInfo util.NetInfo, nodeName string, isNetworkAdvertised bool, ipFamily utilnet.IPFamily) (string, error) { + if !isNetworkAdvertised { + if netInfo.TopologyType() != types.Layer2Topology { + return "", nil + } + return fmt.Sprintf("outport == %q", types.GWRouterToExtSwitchPrefix+netInfo.GetNetworkScopedGWRouterName(nodeName)), nil + } else { + // if the network is advertised, we need to ensure that the SNAT exists with the correct conditional destination match + dbIDs := getEgressIPAddrSetDbIDs(NodeIPAddrSetName, types.DefaultNetworkName, DefaultNetworkControllerName) + addressSetFactory := addressset.NewOvnAddressSetFactory(nbClient, config.IPv4Mode, config.IPv6Mode) + addrSet, err := addressSetFactory.GetAddressSet(dbIDs) + if err != nil { + return "", fmt.Errorf("cannot ensure that addressSet %s exists %v", NodeIPAddrSetName, err) + } + destinationMatch := getClusterNodesDestinationBasedSNATMatch(ipFamily, addrSet) + if netInfo.TopologyType() != types.Layer2Topology { + return destinationMatch, nil + } + return fmt.Sprintf("outport == %q && (%s)", types.GWRouterToExtSwitchPrefix+netInfo.GetNetworkScopedGWRouterName(nodeName), destinationMatch), nil + } +} + // addExternalSwitch creates a switch connected to the external bridge and connects it to // the gateway router func (gw *GatewayManager) addExternalSwitch(prefix, interfaceID, gatewayRouter, macAddress, physNetworkName string, ipAddresses []*net.IPNet, vlanID *uint) error { diff --git a/go-controller/pkg/ovn/gateway_test.go b/go-controller/pkg/ovn/gateway_test.go index 61f89e831d..893d17ad09 100644 --- a/go-controller/pkg/ovn/gateway_test.go +++ b/go-controller/pkg/ovn/gateway_test.go @@ -65,6 +65,15 @@ func generateGatewayInitExpectedNB(testData []libovsdbtest.TestData, expectedOVN expectedNodeSwitch *nbdb.LogicalSwitch, nodeName string, clusterIPSubnets []*net.IPNet, hostSubnets []*net.IPNet, l3GatewayConfig *util.L3GatewayConfig, joinLRPIPs, defLRPIPs []*net.IPNet, skipSnat bool, nodeMgmtPortIP, gatewayMTU string) []libovsdbtest.TestData { + return generateGatewayInitExpectedNBWithPodNetworkAdvertised(testData, expectedOVNClusterRouter, expectedNodeSwitch, + nodeName, clusterIPSubnets, hostSubnets, l3GatewayConfig, joinLRPIPs, defLRPIPs, skipSnat, nodeMgmtPortIP, + gatewayMTU, false) // Default to no pod network advertised +} + +func generateGatewayInitExpectedNBWithPodNetworkAdvertised(testData []libovsdbtest.TestData, expectedOVNClusterRouter *nbdb.LogicalRouter, + expectedNodeSwitch *nbdb.LogicalSwitch, nodeName string, clusterIPSubnets []*net.IPNet, hostSubnets []*net.IPNet, + l3GatewayConfig *util.L3GatewayConfig, joinLRPIPs, defLRPIPs []*net.IPNet, skipSnat bool, nodeMgmtPortIP, + gatewayMTU string, isPodNetworkAdvertised bool) []libovsdbtest.TestData { GRName := "GR_" + nodeName gwSwitchPort := types.JoinSwitchToGWRouterPrefix + GRName @@ -214,6 +223,16 @@ func generateGatewayInitExpectedNB(testData []libovsdbtest.TestData, expectedOVN }, Networks: networks, }) + var egressNodeIPsASv4, egressNodeIPsASv6 *nbdb.AddressSet + if config.OVNKubernetesFeature.EnableEgressIP { + egressNodeIPsASv4, egressNodeIPsASv6 = buildEgressIPNodeAddressSets(physicalIPs) + if config.IPv4Mode { + testData = append(testData, egressNodeIPsASv4) + } + if config.IPv6Mode { + testData = append(testData, egressNodeIPsASv6) + } + } natUUIDs := make([]string, 0, len(clusterIPSubnets)) if !skipSnat { @@ -231,6 +250,19 @@ func generateGatewayInitExpectedNB(testData []libovsdbtest.TestData, expectedOVN if config.Gateway.Mode != config.GatewayModeDisabled { nat.ExternalPortRange = config.DefaultEphemeralPortRange } + if isPodNetworkAdvertised { + // IPv6 pod network + if utilnet.IsIPv6CIDR(subnet) { + if egressNodeIPsASv6 != nil { + nat.Match = fmt.Sprintf("ip6.dst == $%s", egressNodeIPsASv6.Name) + } + } else { + // IPv4 pod network + if egressNodeIPsASv4 != nil { + nat.Match = fmt.Sprintf("ip4.dst == $%s", egressNodeIPsASv4.Name) + } + } + } testData = append(testData, &nat) } } diff --git a/go-controller/pkg/ovn/master_test.go b/go-controller/pkg/ovn/master_test.go index bc943ecf8b..8d46c281d3 100644 --- a/go-controller/pkg/ovn/master_test.go +++ b/go-controller/pkg/ovn/master_test.go @@ -963,6 +963,7 @@ var _ = ginkgo.Describe("Default network controller operations", func() { // Restore global default values before each testcase gomega.Expect(config.PrepareTestConfig()).To(gomega.Succeed()) fakeOvn = NewFakeOVN(true) + config.OVNKubernetesFeature.EnableEgressIP = true app = cli.NewApp() app.Name = "test" @@ -1043,6 +1044,19 @@ var _ = ginkgo.Describe("Default network controller operations", func() { l3GatewayConfig = node1.gatewayConfig(config.GatewayModeLocal, uint(vlanID)) err = util.SetL3GatewayConfig(nodeAnnotator, l3GatewayConfig) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + if config.OVNKubernetesFeature.EnableEgressIP { + physicalIPs := []string{} + for _, ip := range l3GatewayConfig.IPAddresses { + physicalIPs = append(physicalIPs, ip.IP.String()) + } + egressNodeIPsASv4, egressNodeIPsASv6 := buildEgressIPNodeAddressSets(physicalIPs) + if config.IPv4Mode { + dbSetup.NBData = append(dbSetup.NBData, egressNodeIPsASv4) + } + if config.IPv6Mode { + dbSetup.NBData = append(dbSetup.NBData, egressNodeIPsASv6) + } + } err = util.UpdateNodeManagementPortMACAddresses(&testNode, nodeAnnotator, ovntest.MustParseMAC(node1.NodeMgmtPortMAC), types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1245,10 +1259,17 @@ var _ = ginkgo.Describe("Default network controller operations", func() { newNodeSNAT("stale-nodeNAT-UUID-3", "10.0.0.3", Node1GatewayRouterIP), newNodeSNAT("stale-nodeNAT-UUID-4", "10.0.0.3", "172.16.16.3"), } + extraNatsWithMatch := []*nbdb.NAT{ // used for pod network advertised test + newNodeSNATWithMatch("stale-nodeNAT-UUID-1", "10.1.0.3", Node1GatewayRouterIP, "ip4.dst == $a712973235162149816"), + newNodeSNATWithMatch("stale-nodeNAT-UUID-2", "10.2.0.3", Node1GatewayRouterIP, "ip4.dst == $a712973235162149816"), + newNodeSNATWithMatch("stale-nodeNAT-UUID-3", "10.0.0.3", Node1GatewayRouterIP, "ip4.dst == $a712973235162149816"), + newNodeSNATWithMatch("stale-nodeNAT-UUID-4", "10.0.0.3", "172.16.16.3", "ip4.dst == $a712973235162149816"), + } ginkgo.DescribeTable( "reconciles pod network SNATs from syncGateway", func(condition func(*DefaultNetworkController) error, expectedExtraNATs ...*nbdb.NAT) { app.Action = func(ctx *cli.Context) error { + // Initialize config from CLI flags (including --init-gateways) _, err := config.InitConfig(ctx, nil, nil) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1258,6 +1279,10 @@ var _ = ginkgo.Describe("Default network controller operations", func() { _, err = fakeClient.KubeClient.CoreV1().Pods(ns.Name).Create(context.TODO(), &pod, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // generate specific test conditions (after base config is set) + err = condition(oc) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Let the real code run and ensure OVN database sync gomega.Expect(oc.WatchNodes()).To(gomega.Succeed()) @@ -1265,11 +1290,11 @@ var _ = ginkgo.Describe("Default network controller operations", func() { GR := &nbdb.LogicalRouter{ Name: types.GWRouterPrefix + node1.Name, } - err = libovsdbops.CreateOrUpdateNATs(nbClient, GR, extraNats...) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // generate specific test conditions - err = condition(oc) + if !oc.isPodNetworkAdvertisedAtNode(node1.Name) { + err = libovsdbops.CreateOrUpdateNATs(nbClient, GR, extraNats...) + } else { + err = libovsdbops.CreateOrUpdateNATs(nbClient, GR, extraNatsWithMatch...) + } gomega.Expect(err).NotTo(gomega.HaveOccurred()) // ensure the stale SNAT's are cleaned up @@ -1281,19 +1306,23 @@ var _ = ginkgo.Describe("Default network controller operations", func() { err = oc.syncNodeGateway(testNode) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - skipSnat := config.Gateway.DisableSNATMultipleGWs || oc.isPodNetworkAdvertisedAtNode(node1.Name) + skipSnat := config.Gateway.DisableSNATMultipleGWs && !oc.GetNetInfo().IsPrimaryNetwork() var clusterSubnets []*net.IPNet for _, clusterSubnet := range config.Default.ClusterSubnets { clusterSubnets = append(clusterSubnets, clusterSubnet.CIDR) } expectedNBDatabaseState = addNodeLogicalFlowsWithServiceController(nil, expectedOVNClusterRouter, expectedNodeSwitch, expectedClusterRouterPortGroup, expectedClusterPortGroup, &node1, oc.svcTemplateSupport) - expectedNBDatabaseState = generateGatewayInitExpectedNB(expectedNBDatabaseState, expectedOVNClusterRouter, - expectedNodeSwitch, node1.Name, clusterSubnets, []*net.IPNet{subnet}, l3GatewayConfig, - []*net.IPNet{classBIPAddress(node1.LrpIP)}, []*net.IPNet{classBIPAddress(node1.DrLrpIP)}, - skipSnat, node1.NodeMgmtPortIP, "1400") - - if oc.isPodNetworkAdvertisedAtNode(node1.Name) { + if !oc.isPodNetworkAdvertisedAtNode(node1.Name) { + expectedNBDatabaseState = generateGatewayInitExpectedNB(expectedNBDatabaseState, expectedOVNClusterRouter, + expectedNodeSwitch, node1.Name, clusterSubnets, []*net.IPNet{subnet}, l3GatewayConfig, + []*net.IPNet{classBIPAddress(node1.LrpIP)}, []*net.IPNet{classBIPAddress(node1.DrLrpIP)}, + skipSnat, node1.NodeMgmtPortIP, "1400") + } else { + expectedNBDatabaseState = generateGatewayInitExpectedNBWithPodNetworkAdvertised(expectedNBDatabaseState, expectedOVNClusterRouter, + expectedNodeSwitch, node1.Name, clusterSubnets, []*net.IPNet{subnet}, l3GatewayConfig, + []*net.IPNet{classBIPAddress(node1.LrpIP)}, []*net.IPNet{classBIPAddress(node1.DrLrpIP)}, + skipSnat, node1.NodeMgmtPortIP, "1400", true) addrSet, err := oc.addressSetFactory.GetAddressSet(GetAdvertisedNetworkSubnetsAddressSetDBIDs()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) expectedNBDatabaseState = generateAdvertisedUDNIsolationExpectedNB(expectedNBDatabaseState, oc.GetNetworkName(), oc.GetNetworkID(), clusterSubnets, expectedNodeSwitch, addrSet) @@ -1347,17 +1376,21 @@ var _ = ginkgo.Describe("Default network controller operations", func() { mutableNetInfo.SetPodNetworkAdvertisedVRFs(map[string][]string{"node1": {"vrf"}}) return oc.Reconcile(mutableNetInfo) }, - newNodeSNAT("stale-nodeNAT-UUID-4", "10.0.0.3", "172.16.16.3"), // won't be deleted on this node but will be deleted on the node whose IP is 172.16.16.3 since this pod belongs to this node + // won't be deleted on this node since this pod belongs to node-1 and is advertised so we keep this SNAT + newNodeSNATWithMatch("stale-nodeNAT-UUID-3", "10.0.0.3", Node1GatewayRouterIP, "ip4.dst == $a712973235162149816"), + // won't be deleted on this node but will be deleted on the node whose IP is 172.16.16.3 since this pod belongs to node-1 + newNodeSNATWithMatch("stale-nodeNAT-UUID-4", "10.0.0.3", "172.16.16.3", "ip4.dst == $a712973235162149816"), ), ginkgo.Entry( "When pod network is advertised and DisableSNATMultipleGWs is false", func(oc *DefaultNetworkController) error { config.Gateway.DisableSNATMultipleGWs = false + config.OVNKubernetesFeature.EnableEgressIP = true mutableNetInfo := util.NewMutableNetInfo(oc.GetNetInfo()) mutableNetInfo.SetPodNetworkAdvertisedVRFs(map[string][]string{"node1": {"vrf"}}) return oc.Reconcile(mutableNetInfo) }, - newNodeSNAT("stale-nodeNAT-UUID-4", "10.0.0.3", "172.16.16.3"), // won't be deleted on this node but will be deleted on the node whose IP is 172.16.16.3 since this pod belongs to this node + newNodeSNATWithMatch("stale-nodeNAT-UUID-4", "10.0.0.3", "172.16.16.3", "ip4.dst == $a712973235162149816"), // won't be deleted on this node but will be deleted on the node whose IP is 172.16.16.3 since this pod belongs to this node ), ) @@ -1962,6 +1995,12 @@ func newNodeSNAT(uuid, logicalIP, externalIP string) *nbdb.NAT { } } +func newNodeSNATWithMatch(uuid, logicalIP, externalIP, match string) *nbdb.NAT { + nat := newNodeSNAT(uuid, logicalIP, externalIP) + nat.Match = match + return nat +} + func TestController_syncNodes(t *testing.T) { gomega.RegisterFailHandler(ginkgo.Fail) diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index 01f189228b..07282de4df 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -8,10 +8,12 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" "github.com/ovn-kubernetes/libovsdb/ovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors" @@ -234,9 +236,41 @@ func (oc *DefaultNetworkController) updateNamespace(old, newer *corev1.Namespace if err != nil { errors = append(errors, err) } else { - if extIPs, err := getExternalIPsGR(oc.watchFactory, pod.Spec.NodeName); err != nil { - errors = append(errors, err) - } else if err = addOrUpdatePodSNAT(oc.nbClient, oc.GetNetworkScopedGWRouterName(pod.Spec.NodeName), extIPs, podAnnotation.IPs); err != nil { + // Helper function to handle the complex SNAT operations + handleSNATOps := func() error { + extIPs, err := getExternalIPsGR(oc.watchFactory, pod.Spec.NodeName) + if err != nil { + return err + } + + var ops []ovsdb.Operation + // Handle each pod IP individually since each IP family needs its own SNAT match + for _, podIP := range podAnnotation.IPs { + ipFamily := utilnet.IPv4 + if utilnet.IsIPv6CIDR(podIP) { + ipFamily = utilnet.IPv6 + } + snatMatch, err := GetNetworkScopedClusterSubnetSNATMatch(oc.nbClient, oc.GetNetInfo(), pod.Spec.NodeName, oc.isPodNetworkAdvertisedAtNode(pod.Spec.NodeName), ipFamily) + if err != nil { + return fmt.Errorf("failed to get SNAT match for node %s for network %s: %v", pod.Spec.NodeName, oc.GetNetworkName(), err) + } + ops, err = addOrUpdatePodSNATOps(oc.nbClient, oc.GetNetworkScopedGWRouterName(pod.Spec.NodeName), extIPs, []*net.IPNet{podIP}, snatMatch, ops) + if err != nil { + return err + } + } + + // Execute all operations in a single transaction + if len(ops) > 0 { + _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) + if err != nil { + return fmt.Errorf("failed to update SNAT for pod %s on router %s: %v", pod.Name, oc.GetNetworkScopedGWRouterName(pod.Spec.NodeName), err) + } + } + return nil + } + + if err := handleSNATOps(); err != nil { errors = append(errors, err) } } diff --git a/go-controller/pkg/ovn/pods.go b/go-controller/pkg/ovn/pods.go index 5c3478f3cb..0ad9442e3e 100644 --- a/go-controller/pkg/ovn/pods.go +++ b/go-controller/pkg/ovn/pods.go @@ -12,6 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" ktypes "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" "github.com/ovn-kubernetes/libovsdb/ovsdb" @@ -310,13 +311,26 @@ func (oc *DefaultNetworkController) addLogicalPort(pod *corev1.Pod) (err error) if err != nil { return err } - } else if config.Gateway.DisableSNATMultipleGWs && !oc.isPodNetworkAdvertisedAtNode(pod.Spec.NodeName) { + } else if config.Gateway.DisableSNATMultipleGWs { // Add NAT rules to pods if disable SNAT is set and does not have // namespace annotations to go through external egress router if extIPs, err := getExternalIPsGR(oc.watchFactory, pod.Spec.NodeName); err != nil { return err - } else if ops, err = addOrUpdatePodSNATOps(oc.nbClient, oc.GetNetworkScopedGWRouterName(pod.Spec.NodeName), extIPs, podAnnotation.IPs, ops); err != nil { - return err + } else { + // Handle each pod IP individually since each IP family needs its own SNAT match + for _, podIP := range podAnnotation.IPs { + ipFamily := utilnet.IPv4 + if utilnet.IsIPv6CIDR(podIP) { + ipFamily = utilnet.IPv6 + } + snatMatch, err := GetNetworkScopedClusterSubnetSNATMatch(oc.nbClient, oc.GetNetInfo(), pod.Spec.NodeName, oc.isPodNetworkAdvertisedAtNode(pod.Spec.NodeName), ipFamily) + if err != nil { + return fmt.Errorf("failed to get SNAT match for node %s for network %s: %v", pod.Spec.NodeName, oc.GetNetworkName(), err) + } + if ops, err = addOrUpdatePodSNATOps(oc.nbClient, oc.GetNetworkScopedGWRouterName(pod.Spec.NodeName), extIPs, []*net.IPNet{podIP}, snatMatch, ops); err != nil { + return err + } + } } } diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller.go b/go-controller/pkg/ovn/secondary_layer2_network_controller.go index 7ce63fc278..dacf37d090 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller.go @@ -575,36 +575,40 @@ func (oc *SecondaryLayer2NetworkController) addUpdateLocalNodeEvent(node *corev1 gwManager := oc.gatewayManagerForNode(node.Name) oc.gatewayManagers.Store(node.Name, gwManager) - gwConfig, err := oc.nodeGatewayConfig(node) - if err != nil { - errs = append(errs, err) - oc.gatewaysFailed.Store(node.Name, true) - } else { + err := func() error { + gwConfig, err := oc.nodeGatewayConfig(node) + if err != nil { + return err + } if err := gwManager.SyncGateway( node, gwConfig, ); err != nil { - errs = append(errs, err) - oc.gatewaysFailed.Store(node.Name, true) - } else { - if !util.IsPodNetworkAdvertisedAtNode(oc, node.Name) { - err = oc.addUDNClusterSubnetEgressSNAT(gwConfig.hostSubnets, gwManager.gwRouterName) - if err == nil && util.IsRouteAdvertisementsEnabled() { - err = oc.deleteAdvertisedNetworkIsolation(node.Name) - } - } else { - err = oc.deleteUDNClusterSubnetEgressSNAT(gwConfig.hostSubnets, gwManager.gwRouterName) - if err == nil { - err = oc.addAdvertisedNetworkIsolation(node.Name) + return err + } + isUDNAdvertised := util.IsPodNetworkAdvertisedAtNode(oc, node.Name) + err = oc.addOrUpdateUDNClusterSubnetEgressSNAT(gwConfig.hostSubnets, gwManager.gwRouterName, isUDNAdvertised) + if err != nil { + return err + } + if !isUDNAdvertised { + if util.IsRouteAdvertisementsEnabled() { + if err = oc.deleteAdvertisedNetworkIsolation(node.Name); err != nil { + return err } } - if err != nil { - errs = append(errs, err) - oc.gatewaysFailed.Store(node.Name, true) - } else { - oc.gatewaysFailed.Delete(node.Name) + } else { + if err = oc.addAdvertisedNetworkIsolation(node.Name); err != nil { + return err } } + oc.gatewaysFailed.Delete(node.Name) + return nil + }() + + if err != nil { + errs = append(errs, err) + oc.gatewaysFailed.Store(node.Name, true) } } @@ -741,7 +745,8 @@ func (oc *SecondaryLayer2NetworkController) deleteNodeEvent(node *corev1.Node) e return nil } -// addUDNClusterSubnetEgressSNAT adds the SNAT on each node's GR in L2 networks +// addOrUpdateUDNClusterSubnetEgressSNAT adds or updates the SNAT on each node's GR in L2 networks for each UDN +// Based on the isUDNAdvertised flag, the SNAT matches are slightly different // snat eth.dst == d6:cf:fd:2c:a6:44 169.254.0.12 10.128.0.0/14 // snat eth.dst == d6:cf:fd:2c:a6:44 169.254.0.12 2010:100:200::/64 // these SNATs are required for pod2Egress traffic in LGW mode and pod2SameNode traffic in SGW mode to function properly on UDNs @@ -751,9 +756,12 @@ func (oc *SecondaryLayer2NetworkController) deleteNodeEvent(node *corev1.Node) e // externalIP = "169.254.0.12"; which is the masqueradeIP for this L2 UDN // so all in all we want to condionally SNAT all packets that are coming from pods hosted on this node, // which are leaving via UDN's mpX interface to the UDN's masqueradeIP. -func (oc *SecondaryLayer2NetworkController) addUDNClusterSubnetEgressSNAT(localPodSubnets []*net.IPNet, gwRouterName string) error { +// If isUDNAdvertised is true, then we want to SNAT all packets that are coming from pods on this network +// leaving towards nodeIPs on the cluster to masqueradeIP. If network is advertise then the SNAT looks like this: +// "eth.dst == 0a:58:5d:5d:00:02 && (ip4.dst == $a712973235162149816)" "169.254.0.36" "93.93.0.0/16" +func (oc *SecondaryLayer2NetworkController) addOrUpdateUDNClusterSubnetEgressSNAT(localPodSubnets []*net.IPNet, gwRouterName string, isUDNAdvertised bool) error { outputPort := types.GWRouterToJoinSwitchPrefix + gwRouterName - nats, err := oc.buildUDNEgressSNAT(localPodSubnets, outputPort) + nats, err := oc.buildUDNEgressSNAT(localPodSubnets, outputPort, isUDNAdvertised) if err != nil { return err } @@ -770,25 +778,6 @@ func (oc *SecondaryLayer2NetworkController) addUDNClusterSubnetEgressSNAT(localP return nil } -func (oc *SecondaryLayer2NetworkController) deleteUDNClusterSubnetEgressSNAT(localPodSubnets []*net.IPNet, routerName string) error { - outputPort := types.GWRouterToJoinSwitchPrefix + routerName - nats, err := oc.buildUDNEgressSNAT(localPodSubnets, outputPort) - if err != nil { - return err - } - if len(nats) == 0 { - return nil // nothing to do - } - router := &nbdb.LogicalRouter{ - Name: routerName, - } - if err := libovsdbops.DeleteNATs(oc.nbClient, router, nats...); err != nil { - return fmt.Errorf("failed to delete SNAT for cluster on router: %q for network %q, error: %w", - routerName, oc.GetNetworkName(), err) - } - return nil -} - func (oc *SecondaryLayer2NetworkController) nodeGatewayConfig(node *corev1.Node) (*GatewayConfig, error) { l3GatewayConfig, err := util.ParseNodeL3GatewayAnnotation(node) if err != nil { diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index b2355b9100..e9745fe9b2 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -857,7 +857,8 @@ func (oc *SecondaryLayer3NetworkController) addUpdateRemoteNodeEvent(node *corev return err } -// addNodeSubnetEgressSNAT adds the SNAT on each node's ovn-cluster-router in L3 networks +// addOrUpdateUDNNodeSubnetEgressSNAT adds or updates the SNAT on each node's ovn-cluster-router in L3 networks for each UDN +// Based on the isUDNAdvertised flag, the SNAT matches are slightly different // snat eth.dst == d6:cf:fd:2c:a6:44 169.254.0.12 10.128.0.0/24 // snat eth.dst == d6:cf:fd:2c:a6:44 169.254.0.12 2010:100:200::/64 // these SNATs are required for pod2Egress traffic in LGW mode and pod2SameNode traffic in SGW mode to function properly on UDNs @@ -867,9 +868,12 @@ func (oc *SecondaryLayer3NetworkController) addUpdateRemoteNodeEvent(node *corev // externalIP = "169.254.0.12"; which is the masqueradeIP for this L3 UDN // so all in all we want to condionally SNAT all packets that are coming from pods hosted on this node, // which are leaving via UDN's mpX interface to the UDN's masqueradeIP. -func (oc *SecondaryLayer3NetworkController) addUDNNodeSubnetEgressSNAT(localPodSubnets []*net.IPNet, node *corev1.Node) error { +// If isUDNAdvertised is true, then we want to SNAT all packets that are coming from pods on this network +// leaving towards nodeIPs on the cluster to masqueradeIP. If network is advertise then the SNAT looks like this: +// "eth.dst == 0a:58:5d:5d:00:02 && (ip4.dst == $a712973235162149816)" "169.254.0.36" "93.93.0.0/24" +func (oc *SecondaryLayer3NetworkController) addOrUpdateUDNNodeSubnetEgressSNAT(localPodSubnets []*net.IPNet, node *corev1.Node, isUDNAdvertised bool) error { outputPort := types.RouterToSwitchPrefix + oc.GetNetworkScopedName(node.Name) - nats, err := oc.buildUDNEgressSNAT(localPodSubnets, outputPort) + nats, err := oc.buildUDNEgressSNAT(localPodSubnets, outputPort, isUDNAdvertised) if err != nil { return fmt.Errorf("failed to build UDN masquerade SNATs for network %q on node %q, err: %w", oc.GetNetworkName(), node.Name, err) @@ -887,28 +891,6 @@ func (oc *SecondaryLayer3NetworkController) addUDNNodeSubnetEgressSNAT(localPodS return nil } -// deleteUDNNodeSubnetEgressSNAT deletes SNAT rule from network specific -// ovn_cluster_router depending on whether the network is advertised or not -func (oc *SecondaryLayer3NetworkController) deleteUDNNodeSubnetEgressSNAT(localPodSubnets []*net.IPNet, node *corev1.Node) error { - outputPort := types.RouterToSwitchPrefix + oc.GetNetworkScopedName(node.Name) - nats, err := oc.buildUDNEgressSNAT(localPodSubnets, outputPort) - if err != nil { - return fmt.Errorf("failed to build UDN masquerade SNATs for network %q on node %q, err: %w", - oc.GetNetworkName(), node.Name, err) - } - if len(nats) == 0 { - return nil // nothing to do - } - router := &nbdb.LogicalRouter{ - Name: oc.GetNetworkScopedClusterRouterName(), - } - if err := libovsdbops.DeleteNATs(oc.nbClient, router, nats...); err != nil { - return fmt.Errorf("failed to delete SNAT for node subnet on router: %q for network %q, error: %w", - oc.GetNetworkScopedClusterRouterName(), oc.GetNetworkName(), err) - } - return nil -} - func (oc *SecondaryLayer3NetworkController) addNode(node *corev1.Node) ([]*net.IPNet, error) { // Node subnet for the secondary layer3 network is allocated by cluster manager. // Make sure that the node is allocated with the subnet before proceeding @@ -923,19 +905,17 @@ func (oc *SecondaryLayer3NetworkController) addNode(node *corev1.Node) ([]*net.I return nil, err } if util.IsNetworkSegmentationSupportEnabled() && oc.IsPrimaryNetwork() { - if !util.IsPodNetworkAdvertisedAtNode(oc, node.Name) { - if err := oc.addUDNNodeSubnetEgressSNAT(hostSubnets, node); err != nil { - return nil, err - } + isUDNAdvertised := util.IsPodNetworkAdvertisedAtNode(oc, node.Name) + if err := oc.addOrUpdateUDNNodeSubnetEgressSNAT(hostSubnets, node, isUDNAdvertised); err != nil { + return nil, err + } + if !isUDNAdvertised { if util.IsRouteAdvertisementsEnabled() { if err := oc.deleteAdvertisedNetworkIsolation(node.Name); err != nil { return nil, err } } } else { - if err := oc.deleteUDNNodeSubnetEgressSNAT(hostSubnets, node); err != nil { - return nil, err - } if err := oc.addAdvertisedNetworkIsolation(node.Name); err != nil { return nil, err } diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 8ba7269cad..523da8e27b 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -312,13 +312,13 @@ const ( // CUDNPrefix of all CUDN network names CUDNPrefix = "cluster_udn_" - // NFTNoPMTUDRemoteNodeIPsv4 is a set used to track remote node IPs that do not belong to + // NFTRemoteNodeIPsv4 is a set used to track remote node v4IPs that do not belong to // the local node's subnet. - NFTNoPMTUDRemoteNodeIPsv4 = "no-pmtud-remote-node-ips-v4" + NFTRemoteNodeIPsv4 = "remote-node-ips-v4" - // NFTNoPMTUDRemoteNodeIPsv6 is a set used to track remote node IPs that do not belong to + // NFTRemoteNodeIPsv6 is a set used to track remote node v6IPs that do not belong to // the local node's subnet. - NFTNoPMTUDRemoteNodeIPsv6 = "no-pmtud-remote-node-ips-v6" + NFTRemoteNodeIPsv6 = "remote-node-ips-v6" // Metrics MetricOvnkubeNamespace = "ovnkube" diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index fd91edd3be..f2e8b1d22d 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -82,7 +82,6 @@ type NetInfo interface { GetNetworkScopedExtPortName(bridgeID, nodeName string) string GetNetworkScopedLoadBalancerName(lbName string) string GetNetworkScopedLoadBalancerGroupName(lbGroupName string) string - GetNetworkScopedClusterSubnetSNATMatch(nodeName string) string // GetNetInfo is an identity method used to get the specific NetInfo // implementation @@ -543,10 +542,6 @@ func (nInfo *DefaultNetInfo) GetNetworkScopedLoadBalancerGroupName(lbGroupName s return nInfo.GetNetworkScopedName(lbGroupName) } -func (nInfo *DefaultNetInfo) GetNetworkScopedClusterSubnetSNATMatch(_ string) string { - return "" -} - func (nInfo *DefaultNetInfo) canReconcile(netInfo NetInfo) bool { _, ok := netInfo.(*DefaultNetInfo) return ok @@ -738,13 +733,6 @@ func (nInfo *secondaryNetInfo) GetNetworkScopedLoadBalancerGroupName(lbGroupName return nInfo.GetNetworkScopedName(lbGroupName) } -func (nInfo *secondaryNetInfo) GetNetworkScopedClusterSubnetSNATMatch(nodeName string) string { - if nInfo.TopologyType() != types.Layer2Topology { - return "" - } - return fmt.Sprintf("outport == %q", types.GWRouterToExtSwitchPrefix+nInfo.GetNetworkScopedGWRouterName(nodeName)) -} - // getPrefix returns if the logical entities prefix for this network func (nInfo *secondaryNetInfo) getPrefix() string { return GetSecondaryNetworkPrefix(nInfo.netName) @@ -1000,8 +988,7 @@ func parseSubnets(subnetsString, excludeSubnetsString, topology string) ([]confi } } if !found { - return nil, nil, fmt.Errorf("the provided network subnets %v do not contain exluded subnets %v", - subnets, excludeSubnet.CIDR) + return nil, nil, config.NewExcludedSubnetNotContainedError(excludeSubnet.CIDR) } excludeIPNets = append(excludeIPNets, excludeSubnet.CIDR) } @@ -1258,7 +1245,7 @@ func subnetOverlapCheck(netconf *ovncnitypes.NetConf) error { } err = allSubnets.CheckForOverlaps() if err != nil { - return fmt.Errorf("pod or join subnet overlaps with already configured internal subnets: %v", err) + return fmt.Errorf("pod or join subnet overlaps with already configured internal subnets: %w", err) } return nil @@ -1321,6 +1308,21 @@ func GetPodNADToNetworkMapping(pod *corev1.Pod, nInfo NetInfo) (bool, map[string return true, networkSelections, nil } +// overrideActiveNSEWithDefaultNSE overrides the provided active NetworkSelectionElement with the IP and MAC requests from +// the default NetworkSelectionElement after validating its namespace and name. +func overrideActiveNSEWithDefaultNSE(defaultNSE, activeNSE *nettypes.NetworkSelectionElement) error { + if defaultNSE.Namespace != config.Kubernetes.OVNConfigNamespace { + return fmt.Errorf("unexpected default NSE namespace %q, expected %q", defaultNSE.Namespace, config.Kubernetes.OVNConfigNamespace) + } + if defaultNSE.Name != types.DefaultNetworkName { + return fmt.Errorf("unexpected default NSE name %q, expected %q", defaultNSE.Name, types.DefaultNetworkName) + } + activeNSE.IPRequest = defaultNSE.IPRequest + activeNSE.MacRequest = defaultNSE.MacRequest + activeNSE.IPAMClaimReference = defaultNSE.IPAMClaimReference + return nil +} + // GetPodNADToNetworkMappingWithActiveNetwork will call `GetPodNADToNetworkMapping` passing "nInfo" which correspond // to the NetInfo representing the NAD, the resulting NetworkSelectingElements will be decorated with the ones // from found active network @@ -1349,18 +1351,39 @@ func GetPodNADToNetworkMappingWithActiveNetwork(pod *corev1.Pod, nInfo NetInfo, if len(networkSelections) == 0 { networkSelections = map[string]*nettypes.NetworkSelectionElement{} } - networkSelections[activeNetworkNADs[0]] = &nettypes.NetworkSelectionElement{ + + activeNSE := &nettypes.NetworkSelectionElement{ Namespace: activeNetworkNADKey[0], Name: activeNetworkNADKey[1], } - if nInfo.IsPrimaryNetwork() && AllowsPersistentIPs(nInfo) { + // Feature gate integration: EnablePreconfiguredUDNAddresses controls default network IP/MAC transfer to active network + if IsPreconfiguredUDNAddressesEnabled() { + // Limit the static ip and mac requests to the layer2 primary UDN when EnablePreconfiguredUDNAddresses is enabled, we + // don't need to explicitly check this is primary UDN since + // the "active network" concept is exactly that. + if activeNetwork.TopologyType() == types.Layer2Topology { + defaultNSE, err := GetK8sPodDefaultNetworkSelection(pod) + if err != nil { + return false, nil, fmt.Errorf("failed getting default-network annotation for pod %q: %w", pod.Namespace+"/"+pod.Name, err) + } + // If there are static IPs and MACs at the default NSE, override the active NSE with them + if defaultNSE != nil { + if err := overrideActiveNSEWithDefaultNSE(defaultNSE, activeNSE); err != nil { + return false, nil, err + } + } + } + } + + if nInfo.IsPrimaryNetwork() && AllowsPersistentIPs(nInfo) && activeNSE.IPAMClaimReference == "" { ipamClaimName, wasPersistentIPRequested := pod.Annotations[OvnUDNIPAMClaimName] if wasPersistentIPRequested { - networkSelections[activeNetworkNADs[0]].IPAMClaimReference = ipamClaimName + activeNSE.IPAMClaimReference = ipamClaimName } } + networkSelections[activeNetworkNADs[0]] = activeNSE return true, networkSelections, nil } @@ -1378,6 +1401,12 @@ func IsRouteAdvertisementsEnabled() bool { return config.OVNKubernetesFeature.EnableMultiNetwork && config.OVNKubernetesFeature.EnableRouteAdvertisements } +// IsPreconfiguredUDNAddressesEnabled indicates if user defined IPs / MAC +// addresses can be set in primary UDNs +func IsPreconfiguredUDNAddressesEnabled() bool { + return IsNetworkSegmentationSupportEnabled() && config.OVNKubernetesFeature.EnablePreconfiguredUDNAddresses +} + func DoesNetworkRequireIPAM(netInfo NetInfo) bool { return !((netInfo.TopologyType() == types.Layer2Topology || netInfo.TopologyType() == types.LocalnetTopology) && len(netInfo.Subnets()) == 0) } diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index daaaf920a5..6b2220ef25 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -862,6 +862,7 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { expectedError error expectedIsAttachmentRequested bool expectedNetworkSelectionElements map[string]*nadv1.NetworkSelectionElement + enablePreconfiguredUDNAddresses bool } tests := []testConfig{ @@ -1011,10 +1012,143 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { }, }, }, + { + desc: "the network configuration for a primary layer2 UDN receive pod requesting IP, MAC and IPAMClaimRef on default network annotation for it", + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPodAnnotations: map[string]string{ + nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), + DefNetworkAnnotation: `[{"namespace": "ovn-kubernetes", "name": "default", "ips": ["192.168.0.3/24", "fda6::3/48"], "mac": "aa:bb:cc:dd:ee:ff", "ipam-claim-reference": "my-ipam-claim"}]`, + }, + expectedIsAttachmentRequested: true, + expectedNetworkSelectionElements: map[string]*nadv1.NetworkSelectionElement{ + "ns1/attachment1": { + Name: "attachment1", + Namespace: "ns1", + IPRequest: []string{"192.168.0.3/24", "fda6::3/48"}, + MacRequest: "aa:bb:cc:dd:ee:ff", + IPAMClaimReference: "my-ipam-claim", + }, + }, + enablePreconfiguredUDNAddresses: true, + }, + { + desc: "the network configuration for a primary layer2 UDN receive pod requesting IP and MAC on default network annotation for it, but with unexpected namespace", + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPodAnnotations: map[string]string{ + DefNetworkAnnotation: `[{"namespace": "other-namespace", "name": "default", "ips": ["192.168.0.3/24", "fda6::3/48"], "mac": "aa:bb:cc:dd:ee:ff"}]`, + }, + enablePreconfiguredUDNAddresses: true, + expectedError: fmt.Errorf(`unexpected default NSE namespace "other-namespace", expected "ovn-kubernetes"`), + }, + { + desc: "the network configuration for a primary layer2 UDN receive pod requesting IP and MAC on default network annotation for it, but with unexpected name", + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPodAnnotations: map[string]string{ + DefNetworkAnnotation: `[{"namespace": "ovn-kubernetes", "name": "unexpected-name", "ips": ["192.168.0.3/24", "fda6::3/48"], "mac": "aa:bb:cc:dd:ee:ff"}]`, + }, + enablePreconfiguredUDNAddresses: true, + expectedError: fmt.Errorf(`unexpected default NSE name "unexpected-name", expected "default"`), + }, + + { + desc: "default-network ips and mac is is ignored for Layer3 topology", + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer3Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer3Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPodAnnotations: map[string]string{ + nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), + DefNetworkAnnotation: `[{"namespace": "ovn-kubernetes", "name": "default", "ips": ["192.168.0.3/24", "fda6::3/48"], "mac": "aa:bb:cc:dd:ee:ff"}]`, + }, + expectedIsAttachmentRequested: true, + expectedNetworkSelectionElements: map[string]*nadv1.NetworkSelectionElement{ + "ns1/attachment1": { + Name: "attachment1", + Namespace: "ns1", + IPRequest: nil, + MacRequest: "", + }, + }, + enablePreconfiguredUDNAddresses: true, + }, + { + desc: "default-network with bad format", + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + }, + inputPodAnnotations: map[string]string{ + nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), + DefNetworkAnnotation: `[{"foo}`, + }, + enablePreconfiguredUDNAddresses: true, + expectedError: fmt.Errorf(`failed getting default-network annotation for pod "/test-pod": %w`, fmt.Errorf(`GetK8sPodDefaultNetwork: failed to parse CRD object: parsePodNetworkAnnotation: failed to parse pod Network Attachment Selection Annotation JSON format: unexpected end of JSON input`)), + }, } for _, test := range tests { t.Run(test.desc, func(t *testing.T) { g := gomega.NewWithT(t) + + t.Cleanup(func() { + _ = config.PrepareTestConfig() + }) + + // Set custom network config based on test requirements + config.OVNKubernetesFeature.EnablePreconfiguredUDNAddresses = test.enablePreconfiguredUDNAddresses + if test.enablePreconfiguredUDNAddresses { + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + } + netInfo, err := NewNetInfo(test.inputNetConf) g.Expect(err).ToNot(gomega.HaveOccurred()) if test.inputNetConf.NADName != "" { @@ -1048,11 +1182,14 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { primaryUDNNetInfo, ) - if err != nil { + if test.expectedError != nil { + g.Expect(err).To(gomega.HaveOccurred(), "unexpected success operation, epecting error") g.Expect(err).To(gomega.MatchError(test.expectedError)) + } else { + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(isAttachmentRequested).To(gomega.Equal(test.expectedIsAttachmentRequested)) + g.Expect(networkSelectionElements).To(gomega.Equal(test.expectedNetworkSelectionElements)) } - g.Expect(isAttachmentRequested).To(gomega.Equal(test.expectedIsAttachmentRequested)) - g.Expect(networkSelectionElements).To(gomega.Equal(test.expectedNetworkSelectionElements)) }) } } @@ -1088,8 +1225,9 @@ func TestSubnetOverlapCheck(t *testing.T) { "netAttachDefName": "ns1/nad1" } `, - expectedError: fmt.Errorf("invalid subnet configuration: pod or join subnet overlaps with already configured internal subnets: " + - "illegal network configuration: user defined subnet \"10.129.0.0/16\" overlaps cluster subnet \"10.128.0.0/14\""), + expectedError: config.NewSubnetOverlapError( + config.ConfigSubnet{SubnetType: config.UserDefinedSubnets, Subnet: MustParseCIDR("10.129.0.0/16")}, + config.ConfigSubnet{SubnetType: config.ConfigSubnetCluster, Subnet: cidr4}), }, { desc: "return error when IPv4 join subnet in net-attach-def overlaps other subnets", @@ -1104,8 +1242,9 @@ func TestSubnetOverlapCheck(t *testing.T) { "netAttachDefName": "ns1/nad1" } `, - expectedError: fmt.Errorf("invalid subnet configuration: pod or join subnet overlaps with already configured internal subnets: " + - "illegal network configuration: user defined join subnet \"100.64.0.0/24\" overlaps built-in join subnet \"100.64.0.0/16\""), + expectedError: config.NewSubnetOverlapError( + config.ConfigSubnet{SubnetType: config.UserDefinedJoinSubnet, Subnet: MustParseCIDR("100.64.0.0/24")}, + config.ConfigSubnet{SubnetType: config.ConfigSubnetJoin, Subnet: MustParseCIDR(config.Gateway.V4JoinSubnet)}), }, { desc: "return error when IPv6 POD subnet in net-attach-def overlaps other subnets", @@ -1120,8 +1259,10 @@ func TestSubnetOverlapCheck(t *testing.T) { "netAttachDefName": "ns1/nad1" } `, - expectedError: fmt.Errorf("invalid subnet configuration: pod or join subnet overlaps with already configured internal subnets: " + - "illegal network configuration: user defined subnet \"fe01::/24\" overlaps service subnet \"fe01::/16\""), + expectedError: config.NewSubnetOverlapError( + config.ConfigSubnet{SubnetType: config.UserDefinedSubnets, Subnet: MustParseCIDR("fe01::/24")}, + config.ConfigSubnet{SubnetType: config.ConfigSubnetService, Subnet: svcCidr6}, + ), }, { desc: "return error when IPv6 join subnet in net-attach-def overlaps other subnets", @@ -1136,8 +1277,10 @@ func TestSubnetOverlapCheck(t *testing.T) { "netAttachDefName": "ns1/nad1" } `, - expectedError: fmt.Errorf("invalid subnet configuration: pod or join subnet overlaps with already configured internal subnets: " + - "illegal network configuration: user defined join subnet \"fd69::/112\" overlaps masquerade subnet \"fd69::/125\""), + expectedError: config.NewSubnetOverlapError( + config.ConfigSubnet{SubnetType: config.UserDefinedJoinSubnet, Subnet: MustParseCIDR("fd69::/112")}, + config.ConfigSubnet{SubnetType: config.ConfigSubnetMasquerade, Subnet: MustParseCIDR(config.Gateway.V6MasqueradeSubnet)}, + ), }, { desc: "excluded subnet should not be considered for overlap check", @@ -1177,7 +1320,7 @@ func TestSubnetOverlapCheck(t *testing.T) { }) if test.expectedError != nil { _, err := ParseNADInfo(networkAttachmentDefinition) - g.Expect(err).To(gomega.MatchError(test.expectedError.Error())) + g.Expect(err).To(gomega.MatchError(gomega.ContainSubstring(test.expectedError.Error()))) } else { _, err := ParseNADInfo(networkAttachmentDefinition) g.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1261,10 +1404,10 @@ func TestNewNetInfo(t *testing.T) { config.IPv6Mode = test.ipv6Cluster g := gomega.NewWithT(t) _, err := NewNetInfo(inputNetConf) - if test.expectedError == nil { - g.Expect(err).ToNot(gomega.HaveOccurred()) + if test.expectedError != nil { + g.Expect(err).To(gomega.MatchError(test.expectedError), "should return an error for invalid network configuration") } else { - g.Expect(err).To(gomega.MatchError(test.expectedError.Error())) + g.Expect(err).NotTo(gomega.HaveOccurred(), "should not return an error for valid network configuration") } }) } diff --git a/go-controller/pkg/util/nad.go b/go-controller/pkg/util/nad.go new file mode 100644 index 0000000000..3a220e2b82 --- /dev/null +++ b/go-controller/pkg/util/nad.go @@ -0,0 +1,46 @@ +package util + +import ( + "context" + "fmt" + + nadtypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + nadclientset "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned" + nadlisters "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" +) + +// EnsureDefaultNetworkNAD ensures that a well-known NAD exists for the +// default network in ovn-k namespace. This will allow the users to customize +// the primary UDN attachments with static IPs, and/or MAC address requests, by +// using the multus-cni `default network` feature. +func EnsureDefaultNetworkNAD(nadLister nadlisters.NetworkAttachmentDefinitionLister, nadClient nadclientset.Interface) (*nadtypes.NetworkAttachmentDefinition, error) { + nad, err := nadLister.NetworkAttachmentDefinitions(config.Kubernetes.OVNConfigNamespace).Get(types.DefaultNetworkName) + if err != nil && !apierrors.IsNotFound(err) { + return nil, err + } + if nad != nil { + return nad, nil + } + return nadClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(config.Kubernetes.OVNConfigNamespace).Create( + context.Background(), + &nadtypes.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: types.DefaultNetworkName, + Namespace: config.Kubernetes.OVNConfigNamespace, + }, + Spec: nadtypes.NetworkAttachmentDefinitionSpec{ + Config: fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"ovn-kubernetes\", \"type\": \"%s\"}", config.CNI.Plugin), + }, + }, + // note we don't set ourselves as field manager for this create as we + // want to process the resulting event that would otherwise be filtered + // out in nadNeedsUpdate + metav1.CreateOptions{}, + ) +} diff --git a/go-controller/pkg/util/pod_annotation.go b/go-controller/pkg/util/pod_annotation.go index b5c46a804f..0dc9f6af8a 100644 --- a/go-controller/pkg/util/pod_annotation.go +++ b/go-controller/pkg/util/pod_annotation.go @@ -53,7 +53,7 @@ import ( const ( // OvnPodAnnotationName is the constant string representing the POD annotation key OvnPodAnnotationName = "k8s.ovn.org/pod-networks" - // DefNetworkAnnotation is the pod annotation for the cluster-wide default network + // DefNetworkAnnotation is the pod annotation for the cluster-wide active network DefNetworkAnnotation = "v1.multus-cni.io/default-network" // OvnUDNIPAMClaimName is used for workload owners to instruct OVN-K which // IPAMClaim will hold the allocation for the workload diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index cdcface465..4455de04c9 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -671,3 +671,11 @@ func GetMirroredEndpointSlices(controller, sourceName, namespace string, endpoin } return mirroredEndpointSlices, nil } + +func MustParseCIDR(cidr string) *net.IPNet { + _, ipNet, err := net.ParseCIDR(cidr) + if err != nil { + panic(fmt.Sprintf("failed to parse CIDR %q: %v", cidr, err)) + } + return ipNet +} diff --git a/go-controller/vendor/github.com/containernetworking/cni/libcni/api.go b/go-controller/vendor/github.com/containernetworking/cni/libcni/api.go index 0d82a2dd3c..201a12e977 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/libcni/api.go +++ b/go-controller/vendor/github.com/containernetworking/cni/libcni/api.go @@ -15,7 +15,7 @@ package libcni // Note this is the actual implementation of the CNI specification, which -// is reflected in the https://github.com/containernetworking/cni/blob/master/SPEC.md file +// is reflected in the SPEC.md file. // it is typically bundled into runtime providers (i.e. containerd or cri-o would use this // before calling runc or hcsshim). It is also bundled into CNI providers as well, for example, // to add an IP to a container, to parse the configuration of the CNI and so on. @@ -23,10 +23,11 @@ package libcni import ( "context" "encoding/json" + "errors" "fmt" - "io/ioutil" "os" "path/filepath" + "sort" "strings" "github.com/containernetworking/cni/pkg/invoke" @@ -38,6 +39,8 @@ import ( var ( CacheDir = "/var/lib/cni" + // slightly awkward wording to preserve anyone matching on error strings + ErrorCheckNotSupp = fmt.Errorf("does not support the CHECK command") ) const ( @@ -73,10 +76,25 @@ type NetworkConfigList struct { Name string CNIVersion string DisableCheck bool + DisableGC bool Plugins []*NetworkConfig Bytes []byte } +type NetworkAttachment struct { + ContainerID string + Network string + IfName string + Config []byte + NetNS string + CniArgs [][2]string + CapabilityArgs map[string]interface{} +} + +type GCArgs struct { + ValidAttachments []types.GCAttachment +} + type CNI interface { AddNetworkList(ctx context.Context, net *NetworkConfigList, rt *RuntimeConf) (types.Result, error) CheckNetworkList(ctx context.Context, net *NetworkConfigList, rt *RuntimeConf) error @@ -92,6 +110,13 @@ type CNI interface { ValidateNetworkList(ctx context.Context, net *NetworkConfigList) ([]string, error) ValidateNetwork(ctx context.Context, net *NetworkConfig) ([]string, error) + + GCNetworkList(ctx context.Context, net *NetworkConfigList, args *GCArgs) error + GetStatusNetworkList(ctx context.Context, net *NetworkConfigList) error + + GetCachedAttachments(containerID string) ([]*NetworkAttachment, error) + + GetVersionInfo(ctx context.Context, pluginType string) (version.PluginInfo, error) } type CNIConfig struct { @@ -139,8 +164,11 @@ func buildOneConfig(name, cniVersion string, orig *NetworkConfig, prevResult typ if err != nil { return nil, err } + if rt != nil { + return injectRuntimeConfig(orig, rt) + } - return injectRuntimeConfig(orig, rt) + return orig, nil } // This function takes a libcni RuntimeConf structure and injects values into @@ -195,6 +223,7 @@ type cachedInfo struct { Config []byte `json:"config"` IfName string `json:"ifName"` NetworkName string `json:"networkName"` + NetNS string `json:"netns,omitempty"` CniArgs [][2]string `json:"cniArgs,omitempty"` CapabilityArgs map[string]interface{} `json:"capabilityArgs,omitempty"` RawResult map[string]interface{} `json:"result,omitempty"` @@ -229,6 +258,7 @@ func (c *CNIConfig) cacheAdd(result types.Result, config []byte, netName string, Config: config, IfName: rt.IfName, NetworkName: netName, + NetNS: rt.NetNS, CniArgs: rt.Args, CapabilityArgs: rt.CapabilityArgs, } @@ -254,11 +284,11 @@ func (c *CNIConfig) cacheAdd(result types.Result, config []byte, netName string, if err != nil { return err } - if err := os.MkdirAll(filepath.Dir(fname), 0700); err != nil { + if err := os.MkdirAll(filepath.Dir(fname), 0o700); err != nil { return err } - return ioutil.WriteFile(fname, newBytes, 0600) + return os.WriteFile(fname, newBytes, 0o600) } func (c *CNIConfig) cacheDel(netName string, rt *RuntimeConf) error { @@ -277,7 +307,7 @@ func (c *CNIConfig) getCachedConfig(netName string, rt *RuntimeConf) ([]byte, *R if err != nil { return nil, nil, err } - bytes, err = ioutil.ReadFile(fname) + bytes, err = os.ReadFile(fname) if err != nil { // Ignore read errors; the cached result may not exist on-disk return nil, nil, nil @@ -305,7 +335,7 @@ func (c *CNIConfig) getLegacyCachedResult(netName, cniVersion string, rt *Runtim if err != nil { return nil, err } - data, err := ioutil.ReadFile(fname) + data, err := os.ReadFile(fname) if err != nil { // Ignore read errors; the cached result may not exist on-disk return nil, nil @@ -333,7 +363,7 @@ func (c *CNIConfig) getCachedResult(netName, cniVersion string, rt *RuntimeConf) if err != nil { return nil, err } - fdata, err := ioutil.ReadFile(fname) + fdata, err := os.ReadFile(fname) if err != nil { // Ignore read errors; the cached result may not exist on-disk return nil, nil @@ -390,6 +420,68 @@ func (c *CNIConfig) GetNetworkCachedConfig(net *NetworkConfig, rt *RuntimeConf) return c.getCachedConfig(net.Network.Name, rt) } +// GetCachedAttachments returns a list of network attachments from the cache. +// The returned list will be filtered by the containerID if the value is not empty. +func (c *CNIConfig) GetCachedAttachments(containerID string) ([]*NetworkAttachment, error) { + dirPath := filepath.Join(c.getCacheDir(&RuntimeConf{}), "results") + entries, err := os.ReadDir(dirPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + fileNames := make([]string, 0, len(entries)) + for _, e := range entries { + fileNames = append(fileNames, e.Name()) + } + sort.Strings(fileNames) + + attachments := []*NetworkAttachment{} + for _, fname := range fileNames { + if len(containerID) > 0 { + part := fmt.Sprintf("-%s-", containerID) + pos := strings.Index(fname, part) + if pos <= 0 || pos+len(part) >= len(fname) { + continue + } + } + + cacheFile := filepath.Join(dirPath, fname) + bytes, err := os.ReadFile(cacheFile) + if err != nil { + continue + } + + cachedInfo := cachedInfo{} + + if err := json.Unmarshal(bytes, &cachedInfo); err != nil { + continue + } + if cachedInfo.Kind != CNICacheV1 { + continue + } + if len(containerID) > 0 && cachedInfo.ContainerID != containerID { + continue + } + if cachedInfo.IfName == "" || cachedInfo.NetworkName == "" { + continue + } + + attachments = append(attachments, &NetworkAttachment{ + ContainerID: cachedInfo.ContainerID, + Network: cachedInfo.NetworkName, + IfName: cachedInfo.IfName, + Config: cachedInfo.Config, + NetNS: cachedInfo.NetNS, + CniArgs: cachedInfo.CniArgs, + CapabilityArgs: cachedInfo.CapabilityArgs, + }) + } + return attachments, nil +} + func (c *CNIConfig) addNetwork(ctx context.Context, name, cniVersion string, net *NetworkConfig, prevResult types.Result, rt *RuntimeConf) (types.Result, error) { c.ensureExec() pluginPath, err := c.exec.FindInPath(net.Network.Type, c.Path) @@ -453,7 +545,7 @@ func (c *CNIConfig) CheckNetworkList(ctx context.Context, list *NetworkConfigLis if gtet, err := version.GreaterThanOrEqualTo(list.CNIVersion, "0.4.0"); err != nil { return err } else if !gtet { - return fmt.Errorf("configuration version %q does not support the CHECK command", list.CNIVersion) + return fmt.Errorf("configuration version %q %w", list.CNIVersion, ErrorCheckNotSupp) } if list.DisableCheck { @@ -497,9 +589,9 @@ func (c *CNIConfig) DelNetworkList(ctx context.Context, list *NetworkConfigList, if gtet, err := version.GreaterThanOrEqualTo(list.CNIVersion, "0.4.0"); err != nil { return err } else if gtet { - cachedResult, err = c.getCachedResult(list.Name, list.CNIVersion, rt) - if err != nil { - return fmt.Errorf("failed to get network %q cached result: %w", list.Name, err) + if cachedResult, err = c.getCachedResult(list.Name, list.CNIVersion, rt); err != nil { + _ = c.cacheDel(list.Name, rt) + cachedResult = nil } } @@ -509,6 +601,7 @@ func (c *CNIConfig) DelNetworkList(ctx context.Context, list *NetworkConfigList, return fmt.Errorf("plugin %s failed (delete): %w", pluginDescription(net.Network), err) } } + _ = c.cacheDel(list.Name, rt) return nil @@ -547,7 +640,7 @@ func (c *CNIConfig) CheckNetwork(ctx context.Context, net *NetworkConfig, rt *Ru if gtet, err := version.GreaterThanOrEqualTo(net.Network.CNIVersion, "0.4.0"); err != nil { return err } else if !gtet { - return fmt.Errorf("configuration version %q does not support the CHECK command", net.Network.CNIVersion) + return fmt.Errorf("configuration version %q %w", net.Network.CNIVersion, ErrorCheckNotSupp) } cachedResult, err := c.getCachedResult(net.Network.Name, net.Network.CNIVersion, rt) @@ -666,6 +759,129 @@ func (c *CNIConfig) GetVersionInfo(ctx context.Context, pluginType string) (vers return invoke.GetVersionInfo(ctx, pluginPath, c.exec) } +// GCNetworkList will do two things +// - dump the list of cached attachments, and issue deletes as necessary +// - issue a GC to the underlying plugins (if the version is high enough) +func (c *CNIConfig) GCNetworkList(ctx context.Context, list *NetworkConfigList, args *GCArgs) error { + // If DisableGC is set, then don't bother GCing at all. + if list.DisableGC { + return nil + } + + // First, get the list of cached attachments + cachedAttachments, err := c.GetCachedAttachments("") + if err != nil { + return nil + } + + var validAttachments map[types.GCAttachment]interface{} + if args != nil { + validAttachments = make(map[types.GCAttachment]interface{}, len(args.ValidAttachments)) + for _, a := range args.ValidAttachments { + validAttachments[a] = nil + } + } + + var errs []error + + for _, cachedAttachment := range cachedAttachments { + if cachedAttachment.Network != list.Name { + continue + } + // we found this attachment + gca := types.GCAttachment{ + ContainerID: cachedAttachment.ContainerID, + IfName: cachedAttachment.IfName, + } + if _, ok := validAttachments[gca]; ok { + continue + } + // otherwise, this attachment wasn't valid and we should issue a CNI DEL + rt := RuntimeConf{ + ContainerID: cachedAttachment.ContainerID, + NetNS: cachedAttachment.NetNS, + IfName: cachedAttachment.IfName, + Args: cachedAttachment.CniArgs, + CapabilityArgs: cachedAttachment.CapabilityArgs, + } + if err := c.DelNetworkList(ctx, list, &rt); err != nil { + errs = append(errs, fmt.Errorf("failed to delete stale attachment %s %s: %w", rt.ContainerID, rt.IfName, err)) + } + } + + // now, if the version supports it, issue a GC + if gt, _ := version.GreaterThanOrEqualTo(list.CNIVersion, "1.1.0"); gt { + inject := map[string]interface{}{ + "name": list.Name, + "cniVersion": list.CNIVersion, + } + if args != nil { + inject["cni.dev/valid-attachments"] = args.ValidAttachments + // #1101: spec used incorrect variable name + inject["cni.dev/attachments"] = args.ValidAttachments + } + + for _, plugin := range list.Plugins { + // build config here + pluginConfig, err := InjectConf(plugin, inject) + if err != nil { + errs = append(errs, fmt.Errorf("failed to generate configuration to GC plugin %s: %w", plugin.Network.Type, err)) + } + if err := c.gcNetwork(ctx, pluginConfig); err != nil { + errs = append(errs, fmt.Errorf("failed to GC plugin %s: %w", plugin.Network.Type, err)) + } + } + } + + return errors.Join(errs...) +} + +func (c *CNIConfig) gcNetwork(ctx context.Context, net *NetworkConfig) error { + c.ensureExec() + pluginPath, err := c.exec.FindInPath(net.Network.Type, c.Path) + if err != nil { + return err + } + args := c.args("GC", &RuntimeConf{}) + + return invoke.ExecPluginWithoutResult(ctx, pluginPath, net.Bytes, args, c.exec) +} + +func (c *CNIConfig) GetStatusNetworkList(ctx context.Context, list *NetworkConfigList) error { + // If the version doesn't support status, abort. + if gt, _ := version.GreaterThanOrEqualTo(list.CNIVersion, "1.1.0"); !gt { + return nil + } + + inject := map[string]interface{}{ + "name": list.Name, + "cniVersion": list.CNIVersion, + } + + for _, plugin := range list.Plugins { + // build config here + pluginConfig, err := InjectConf(plugin, inject) + if err != nil { + return fmt.Errorf("failed to generate configuration to get plugin STATUS %s: %w", plugin.Network.Type, err) + } + if err := c.getStatusNetwork(ctx, pluginConfig); err != nil { + return err // Don't collect errors here, so we return a clean error code. + } + } + return nil +} + +func (c *CNIConfig) getStatusNetwork(ctx context.Context, net *NetworkConfig) error { + c.ensureExec() + pluginPath, err := c.exec.FindInPath(net.Network.Type, c.Path) + if err != nil { + return err + } + args := c.args("STATUS", &RuntimeConf{}) + + return invoke.ExecPluginWithoutResult(ctx, pluginPath, net.Bytes, args, c.exec) +} + // ===== func (c *CNIConfig) args(action string, rt *RuntimeConf) *invoke.Args { return &invoke.Args{ diff --git a/go-controller/vendor/github.com/containernetworking/cni/libcni/conf.go b/go-controller/vendor/github.com/containernetworking/cni/libcni/conf.go index 3cd6a59d1c..1d1b821c63 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/libcni/conf.go +++ b/go-controller/vendor/github.com/containernetworking/cni/libcni/conf.go @@ -16,13 +16,16 @@ package libcni import ( "encoding/json" + "errors" "fmt" - "io/ioutil" "os" "path/filepath" + "slices" "sort" + "strings" "github.com/containernetworking/cni/pkg/types" + "github.com/containernetworking/cni/pkg/version" ) type NotFoundError struct { @@ -54,7 +57,7 @@ func ConfFromBytes(bytes []byte) (*NetworkConfig, error) { } func ConfFromFile(filename string) (*NetworkConfig, error) { - bytes, err := ioutil.ReadFile(filename) + bytes, err := os.ReadFile(filename) if err != nil { return nil, fmt.Errorf("error reading %s: %w", filename, err) } @@ -85,17 +88,89 @@ func ConfListFromBytes(bytes []byte) (*NetworkConfigList, error) { } } - disableCheck := false - if rawDisableCheck, ok := rawList["disableCheck"]; ok { - disableCheck, ok = rawDisableCheck.(bool) + rawVersions, ok := rawList["cniVersions"] + if ok { + // Parse the current package CNI version + rvs, ok := rawVersions.([]interface{}) + if !ok { + return nil, fmt.Errorf("error parsing configuration list: invalid type for cniVersions: %T", rvs) + } + vs := make([]string, 0, len(rvs)) + for i, rv := range rvs { + v, ok := rv.(string) + if !ok { + return nil, fmt.Errorf("error parsing configuration list: invalid type for cniVersions index %d: %T", i, rv) + } + gt, err := version.GreaterThan(v, version.Current()) + if err != nil { + return nil, fmt.Errorf("error parsing configuration list: invalid cniVersions entry %s at index %d: %w", v, i, err) + } else if !gt { + // Skip versions "greater" than this implementation of the spec + vs = append(vs, v) + } + } + + // if cniVersion was already set, append it to the list for sorting. + if cniVersion != "" { + gt, err := version.GreaterThan(cniVersion, version.Current()) + if err != nil { + return nil, fmt.Errorf("error parsing configuration list: invalid cniVersion %s: %w", cniVersion, err) + } else if !gt { + // ignore any versions higher than the current implemented spec version + vs = append(vs, cniVersion) + } + } + slices.SortFunc[[]string](vs, func(v1, v2 string) int { + if v1 == v2 { + return 0 + } + if gt, _ := version.GreaterThan(v1, v2); gt { + return 1 + } + return -1 + }) + if len(vs) > 0 { + cniVersion = vs[len(vs)-1] + } + } + + readBool := func(key string) (bool, error) { + rawVal, ok := rawList[key] if !ok { - return nil, fmt.Errorf("error parsing configuration list: invalid disableCheck type %T", rawDisableCheck) + return false, nil } + if b, ok := rawVal.(bool); ok { + return b, nil + } + + s, ok := rawVal.(string) + if !ok { + return false, fmt.Errorf("error parsing configuration list: invalid type %T for %s", rawVal, key) + } + s = strings.ToLower(s) + switch s { + case "false": + return false, nil + case "true": + return true, nil + } + return false, fmt.Errorf("error parsing configuration list: invalid value %q for %s", s, key) + } + + disableCheck, err := readBool("disableCheck") + if err != nil { + return nil, err + } + + disableGC, err := readBool("disableGC") + if err != nil { + return nil, err } list := &NetworkConfigList{ Name: name, DisableCheck: disableCheck, + DisableGC: disableGC, CNIVersion: cniVersion, Bytes: bytes, } @@ -129,7 +204,7 @@ func ConfListFromBytes(bytes []byte) (*NetworkConfigList, error) { } func ConfListFromFile(filename string) (*NetworkConfigList, error) { - bytes, err := ioutil.ReadFile(filename) + bytes, err := os.ReadFile(filename) if err != nil { return nil, fmt.Errorf("error reading %s: %w", filename, err) } @@ -138,7 +213,7 @@ func ConfListFromFile(filename string) (*NetworkConfigList, error) { func ConfFiles(dir string, extensions []string) ([]string, error) { // In part, adapted from rkt/networking/podenv.go#listFiles - files, err := ioutil.ReadDir(dir) + files, err := os.ReadDir(dir) switch { case err == nil: // break case os.IsNotExist(err): @@ -206,7 +281,8 @@ func LoadConfList(dir, name string) (*NetworkConfigList, error) { singleConf, err := LoadConf(dir, name) if err != nil { // A little extra logic so the error makes sense - if _, ok := err.(NoConfigsFoundError); len(files) != 0 && ok { + var ncfErr NoConfigsFoundError + if len(files) != 0 && errors.As(err, &ncfErr) { // Config lists found but no config files found return nil, NotFoundError{dir, name} } diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/delegate.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/delegate.go index 8defe4dd39..c8b548e7c6 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/delegate.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/delegate.go @@ -51,25 +51,34 @@ func DelegateAdd(ctx context.Context, delegatePlugin string, netconf []byte, exe // DelegateCheck calls the given delegate plugin with the CNI CHECK action and // JSON configuration func DelegateCheck(ctx context.Context, delegatePlugin string, netconf []byte, exec Exec) error { + return delegateNoResult(ctx, delegatePlugin, netconf, exec, "CHECK") +} + +func delegateNoResult(ctx context.Context, delegatePlugin string, netconf []byte, exec Exec, verb string) error { pluginPath, realExec, err := delegateCommon(delegatePlugin, exec) if err != nil { return err } - // DelegateCheck will override the original CNI_COMMAND env from process with CHECK - return ExecPluginWithoutResult(ctx, pluginPath, netconf, delegateArgs("CHECK"), realExec) + return ExecPluginWithoutResult(ctx, pluginPath, netconf, delegateArgs(verb), realExec) } // DelegateDel calls the given delegate plugin with the CNI DEL action and // JSON configuration func DelegateDel(ctx context.Context, delegatePlugin string, netconf []byte, exec Exec) error { - pluginPath, realExec, err := delegateCommon(delegatePlugin, exec) - if err != nil { - return err - } + return delegateNoResult(ctx, delegatePlugin, netconf, exec, "DEL") +} - // DelegateDel will override the original CNI_COMMAND env from process with DEL - return ExecPluginWithoutResult(ctx, pluginPath, netconf, delegateArgs("DEL"), realExec) +// DelegateStatus calls the given delegate plugin with the CNI STATUS action and +// JSON configuration +func DelegateStatus(ctx context.Context, delegatePlugin string, netconf []byte, exec Exec) error { + return delegateNoResult(ctx, delegatePlugin, netconf, exec, "STATUS") +} + +// DelegateGC calls the given delegate plugin with the CNI GC action and +// JSON configuration +func DelegateGC(ctx context.Context, delegatePlugin string, netconf []byte, exec Exec) error { + return delegateNoResult(ctx, delegatePlugin, netconf, exec, "GC") } // return CNIArgs used by delegation diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/exec.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/exec.go index 3ad07aa8f2..a5e015fc92 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/exec.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/exec.go @@ -81,17 +81,17 @@ func fixupResultVersion(netconf, result []byte) (string, []byte, error) { // object to ExecPluginWithResult() to verify the incoming stdin and environment // and provide a tailored response: // -//import ( +// import ( // "encoding/json" // "path" // "strings" -//) +// ) // -//type fakeExec struct { +// type fakeExec struct { // version.PluginDecoder -//} +// } // -//func (f *fakeExec) ExecPlugin(pluginPath string, stdinData []byte, environ []string) ([]byte, error) { +// func (f *fakeExec) ExecPlugin(pluginPath string, stdinData []byte, environ []string) ([]byte, error) { // net := &types.NetConf{} // err := json.Unmarshal(stdinData, net) // if err != nil { @@ -109,14 +109,14 @@ func fixupResultVersion(netconf, result []byte) (string, []byte, error) { // } // } // return []byte("{\"CNIVersion\":\"0.4.0\"}"), nil -//} +// } // -//func (f *fakeExec) FindInPath(plugin string, paths []string) (string, error) { +// func (f *fakeExec) FindInPath(plugin string, paths []string) (string, error) { // if len(paths) > 0 { // return path.Join(paths[0], plugin), nil // } // return "", fmt.Errorf("failed to find plugin %s in paths %v", plugin, paths) -//} +// } func ExecPluginWithResult(ctx context.Context, pluginPath string, netconf []byte, args CNIArgs, exec Exec) (types.Result, error) { if exec == nil { diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/os_unix.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/os_unix.go index 9bcfb45536..ed0999bd0e 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/os_unix.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/invoke/os_unix.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris // +build darwin dragonfly freebsd linux netbsd openbsd solaris package invoke diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_darwin.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_darwin.go new file mode 100644 index 0000000000..cffe136178 --- /dev/null +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_darwin.go @@ -0,0 +1,21 @@ +// Copyright 2022 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ns + +import "github.com/containernetworking/cni/pkg/types" + +func CheckNetNS(nsPath string) (bool, *types.Error) { + return false, nil +} diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_linux.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_linux.go new file mode 100644 index 0000000000..3d58e75d6c --- /dev/null +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_linux.go @@ -0,0 +1,50 @@ +// Copyright 2022 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ns + +import ( + "runtime" + + "github.com/vishvananda/netns" + + "github.com/containernetworking/cni/pkg/types" +) + +// Returns an object representing the current OS thread's network namespace +func getCurrentNS() (netns.NsHandle, error) { + // Lock the thread in case other goroutine executes in it and changes its + // network namespace after getCurrentThreadNetNSPath(), otherwise it might + // return an unexpected network namespace. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + return netns.Get() +} + +func CheckNetNS(nsPath string) (bool, *types.Error) { + ns, err := netns.GetFromPath(nsPath) + // Let plugins check whether nsPath from args is valid. Also support CNI DEL for empty nsPath as already-deleted nsPath. + if err != nil { + return false, nil + } + defer ns.Close() + + pluginNS, err := getCurrentNS() + if err != nil { + return false, types.NewError(types.ErrInvalidNetNS, "get plugin's netns failed", "") + } + defer pluginNS.Close() + + return pluginNS.Equal(ns), nil +} diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_windows.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_windows.go new file mode 100644 index 0000000000..cffe136178 --- /dev/null +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/ns/ns_windows.go @@ -0,0 +1,21 @@ +// Copyright 2022 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ns + +import "github.com/containernetworking/cni/pkg/types" + +func CheckNetNS(nsPath string) (bool, *types.Error) { + return false, nil +} diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/skel/skel.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/skel/skel.go index cb8781972d..f29cf34594 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/skel/skel.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/skel/skel.go @@ -19,13 +19,14 @@ package skel import ( "bytes" "encoding/json" + "errors" "fmt" "io" - "io/ioutil" "log" "os" "strings" + "github.com/containernetworking/cni/pkg/ns" "github.com/containernetworking/cni/pkg/types" "github.com/containernetworking/cni/pkg/utils" "github.com/containernetworking/cni/pkg/version" @@ -34,12 +35,13 @@ import ( // CmdArgs captures all the arguments passed in to the plugin // via both env vars and stdin type CmdArgs struct { - ContainerID string - Netns string - IfName string - Args string - Path string - StdinData []byte + ContainerID string + Netns string + IfName string + Args string + Path string + NetnsOverride string + StdinData []byte } type dispatcher struct { @@ -55,21 +57,25 @@ type dispatcher struct { type reqForCmdEntry map[string]bool func (t *dispatcher) getCmdArgsFromEnv() (string, *CmdArgs, *types.Error) { - var cmd, contID, netns, ifName, args, path string + var cmd, contID, netns, ifName, args, path, netnsOverride string vars := []struct { - name string - val *string - reqForCmd reqForCmdEntry + name string + val *string + reqForCmd reqForCmdEntry + validateFn func(string) *types.Error }{ { "CNI_COMMAND", &cmd, reqForCmdEntry{ - "ADD": true, - "CHECK": true, - "DEL": true, + "ADD": true, + "CHECK": true, + "DEL": true, + "GC": true, + "STATUS": true, }, + nil, }, { "CNI_CONTAINERID", @@ -79,6 +85,7 @@ func (t *dispatcher) getCmdArgsFromEnv() (string, *CmdArgs, *types.Error) { "CHECK": true, "DEL": true, }, + utils.ValidateContainerID, }, { "CNI_NETNS", @@ -88,6 +95,7 @@ func (t *dispatcher) getCmdArgsFromEnv() (string, *CmdArgs, *types.Error) { "CHECK": true, "DEL": false, }, + nil, }, { "CNI_IFNAME", @@ -97,6 +105,7 @@ func (t *dispatcher) getCmdArgsFromEnv() (string, *CmdArgs, *types.Error) { "CHECK": true, "DEL": true, }, + utils.ValidateInterfaceName, }, { "CNI_ARGS", @@ -106,15 +115,29 @@ func (t *dispatcher) getCmdArgsFromEnv() (string, *CmdArgs, *types.Error) { "CHECK": false, "DEL": false, }, + nil, }, { "CNI_PATH", &path, reqForCmdEntry{ - "ADD": true, - "CHECK": true, - "DEL": true, + "ADD": true, + "CHECK": true, + "DEL": true, + "GC": true, + "STATUS": true, + }, + nil, + }, + { + "CNI_NETNS_OVERRIDE", + &netnsOverride, + reqForCmdEntry{ + "ADD": false, + "CHECK": false, + "DEL": false, }, + nil, }, } @@ -125,6 +148,10 @@ func (t *dispatcher) getCmdArgsFromEnv() (string, *CmdArgs, *types.Error) { if v.reqForCmd[cmd] || v.name == "CNI_COMMAND" { argsMissing = append(argsMissing, v.name) } + } else if v.reqForCmd[cmd] && v.validateFn != nil { + if err := v.validateFn(*v.val); err != nil { + return "", nil, err + } } } @@ -137,18 +164,25 @@ func (t *dispatcher) getCmdArgsFromEnv() (string, *CmdArgs, *types.Error) { t.Stdin = bytes.NewReader(nil) } - stdinData, err := ioutil.ReadAll(t.Stdin) + stdinData, err := io.ReadAll(t.Stdin) if err != nil { return "", nil, types.NewError(types.ErrIOFailure, fmt.Sprintf("error reading from stdin: %v", err), "") } + if cmd != "VERSION" { + if err := validateConfig(stdinData); err != nil { + return "", nil, err + } + } + cmdArgs := &CmdArgs{ - ContainerID: contID, - Netns: netns, - IfName: ifName, - Args: args, - Path: path, - StdinData: stdinData, + ContainerID: contID, + Netns: netns, + IfName: ifName, + Args: args, + Path: path, + StdinData: stdinData, + NetnsOverride: netnsOverride, } return cmd, cmdArgs, nil } @@ -163,8 +197,13 @@ func (t *dispatcher) checkVersionAndCall(cmdArgs *CmdArgs, pluginVersionInfo ver return types.NewError(types.ErrIncompatibleCNIVersion, "incompatible CNI versions", verErr.Details()) } + if toCall == nil { + return nil + } + if err = toCall(cmdArgs); err != nil { - if e, ok := err.(*types.Error); ok { + var e *types.Error + if errors.As(err, &e) { // don't wrap Error in Error return e } @@ -190,7 +229,7 @@ func validateConfig(jsonBytes []byte) *types.Error { return nil } -func (t *dispatcher) pluginMain(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, versionInfo version.PluginInfo, about string) *types.Error { +func (t *dispatcher) pluginMain(funcs CNIFuncs, versionInfo version.PluginInfo, about string) *types.Error { cmd, cmdArgs, err := t.getCmdArgsFromEnv() if err != nil { // Print the about string to stderr when no command is set @@ -202,21 +241,20 @@ func (t *dispatcher) pluginMain(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, return err } - if cmd != "VERSION" { - if err = validateConfig(cmdArgs.StdinData); err != nil { - return err - } - if err = utils.ValidateContainerID(cmdArgs.ContainerID); err != nil { + switch cmd { + case "ADD": + err = t.checkVersionAndCall(cmdArgs, versionInfo, funcs.Add) + if err != nil { return err } - if err = utils.ValidateInterfaceName(cmdArgs.IfName); err != nil { - return err + if strings.ToUpper(cmdArgs.NetnsOverride) != "TRUE" && cmdArgs.NetnsOverride != "1" { + isPluginNetNS, checkErr := ns.CheckNetNS(cmdArgs.Netns) + if checkErr != nil { + return checkErr + } else if isPluginNetNS { + return types.NewError(types.ErrInvalidNetNS, "plugin's netns and netns from CNI_NETNS should not be the same", "") + } } - } - - switch cmd { - case "ADD": - err = t.checkVersionAndCall(cmdArgs, versionInfo, cmdAdd) case "CHECK": configVersion, err := t.ConfVersionDecoder.Decode(cmdArgs.StdinData) if err != nil { @@ -232,7 +270,7 @@ func (t *dispatcher) pluginMain(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, if err != nil { return types.NewError(types.ErrDecodingFailure, err.Error(), "") } else if gtet { - if err := t.checkVersionAndCall(cmdArgs, versionInfo, cmdCheck); err != nil { + if err := t.checkVersionAndCall(cmdArgs, versionInfo, funcs.Check); err != nil { return err } return nil @@ -240,7 +278,62 @@ func (t *dispatcher) pluginMain(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, } return types.NewError(types.ErrIncompatibleCNIVersion, "plugin version does not allow CHECK", "") case "DEL": - err = t.checkVersionAndCall(cmdArgs, versionInfo, cmdDel) + err = t.checkVersionAndCall(cmdArgs, versionInfo, funcs.Del) + if err != nil { + return err + } + if strings.ToUpper(cmdArgs.NetnsOverride) != "TRUE" && cmdArgs.NetnsOverride != "1" { + isPluginNetNS, checkErr := ns.CheckNetNS(cmdArgs.Netns) + if checkErr != nil { + return checkErr + } else if isPluginNetNS { + return types.NewError(types.ErrInvalidNetNS, "plugin's netns and netns from CNI_NETNS should not be the same", "") + } + } + case "GC": + configVersion, err := t.ConfVersionDecoder.Decode(cmdArgs.StdinData) + if err != nil { + return types.NewError(types.ErrDecodingFailure, err.Error(), "") + } + if gtet, err := version.GreaterThanOrEqualTo(configVersion, "1.1.0"); err != nil { + return types.NewError(types.ErrDecodingFailure, err.Error(), "") + } else if !gtet { + return types.NewError(types.ErrIncompatibleCNIVersion, "config version does not allow GC", "") + } + for _, pluginVersion := range versionInfo.SupportedVersions() { + gtet, err := version.GreaterThanOrEqualTo(pluginVersion, configVersion) + if err != nil { + return types.NewError(types.ErrDecodingFailure, err.Error(), "") + } else if gtet { + if err := t.checkVersionAndCall(cmdArgs, versionInfo, funcs.GC); err != nil { + return err + } + return nil + } + } + return types.NewError(types.ErrIncompatibleCNIVersion, "plugin version does not allow GC", "") + case "STATUS": + configVersion, err := t.ConfVersionDecoder.Decode(cmdArgs.StdinData) + if err != nil { + return types.NewError(types.ErrDecodingFailure, err.Error(), "") + } + if gtet, err := version.GreaterThanOrEqualTo(configVersion, "1.1.0"); err != nil { + return types.NewError(types.ErrDecodingFailure, err.Error(), "") + } else if !gtet { + return types.NewError(types.ErrIncompatibleCNIVersion, "config version does not allow STATUS", "") + } + for _, pluginVersion := range versionInfo.SupportedVersions() { + gtet, err := version.GreaterThanOrEqualTo(pluginVersion, configVersion) + if err != nil { + return types.NewError(types.ErrDecodingFailure, err.Error(), "") + } else if gtet { + if err := t.checkVersionAndCall(cmdArgs, versionInfo, funcs.Status); err != nil { + return err + } + return nil + } + } + return types.NewError(types.ErrIncompatibleCNIVersion, "plugin version does not allow STATUS", "") case "VERSION": if err := versionInfo.Encode(t.Stdout); err != nil { return types.NewError(types.ErrIOFailure, err.Error(), "") @@ -264,13 +357,63 @@ func (t *dispatcher) pluginMain(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, // // To let this package automatically handle errors and call os.Exit(1) for you, // use PluginMain() instead. +// +// Deprecated: Use github.com/containernetworking/cni/pkg/skel.PluginMainFuncsWithError instead. func PluginMainWithError(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, versionInfo version.PluginInfo, about string) *types.Error { + return PluginMainFuncsWithError(CNIFuncs{Add: cmdAdd, Check: cmdCheck, Del: cmdDel}, versionInfo, about) +} + +// CNIFuncs contains a group of callback command funcs to be passed in as +// parameters to the core "main" for a plugin. +type CNIFuncs struct { + Add func(_ *CmdArgs) error + Del func(_ *CmdArgs) error + Check func(_ *CmdArgs) error + GC func(_ *CmdArgs) error + Status func(_ *CmdArgs) error +} + +// PluginMainFuncsWithError is the core "main" for a plugin. It accepts +// callback functions defined within CNIFuncs and returns an error. +// +// The caller must also specify what CNI spec versions the plugin supports. +// +// It is the responsibility of the caller to check for non-nil error return. +// +// For a plugin to comply with the CNI spec, it must print any error to stdout +// as JSON and then exit with nonzero status code. +// +// To let this package automatically handle errors and call os.Exit(1) for you, +// use PluginMainFuncs() instead. +func PluginMainFuncsWithError(funcs CNIFuncs, versionInfo version.PluginInfo, about string) *types.Error { return (&dispatcher{ Getenv: os.Getenv, Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, - }).pluginMain(cmdAdd, cmdCheck, cmdDel, versionInfo, about) + }).pluginMain(funcs, versionInfo, about) +} + +// PluginMainFuncs is the core "main" for a plugin which includes automatic error handling. +// This is a newer alternative func to PluginMain which abstracts CNI commands within a +// CNIFuncs interface. +// +// The caller must also specify what CNI spec versions the plugin supports. +// +// The caller can specify an "about" string, which is printed on stderr +// when no CNI_COMMAND is specified. The recommended output is "CNI plugin v" +// +// When an error occurs in any func in CNIFuncs, PluginMainFuncs will print the error +// as JSON to stdout and call os.Exit(1). +// +// To have more control over error handling, use PluginMainFuncsWithError() instead. +func PluginMainFuncs(funcs CNIFuncs, versionInfo version.PluginInfo, about string) { + if e := PluginMainFuncsWithError(funcs, versionInfo, about); e != nil { + if err := e.Print(); err != nil { + log.Print("Error writing error JSON to stdout: ", err) + } + os.Exit(1) + } } // PluginMain is the core "main" for a plugin which includes automatic error handling. @@ -284,6 +427,8 @@ func PluginMainWithError(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, versio // as JSON to stdout and call os.Exit(1). // // To have more control over error handling, use PluginMainWithError() instead. +// +// Deprecated: Use github.com/containernetworking/cni/pkg/skel.PluginMainFuncs instead. func PluginMain(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, versionInfo version.PluginInfo, about string) { if e := PluginMainWithError(cmdAdd, cmdCheck, cmdDel, versionInfo, about); e != nil { if err := e.Print(); err != nil { diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/types/100/types.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/types/100/types.go index 0e1e8b857b..f58b91206d 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/types/100/types.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/types/100/types.go @@ -26,9 +26,10 @@ import ( convert "github.com/containernetworking/cni/pkg/types/internal" ) -const ImplementedSpecVersion string = "1.0.0" +// The types did not change between v1.0 and v1.1 +const ImplementedSpecVersion string = "1.1.0" -var supportedVersions = []string{ImplementedSpecVersion} +var supportedVersions = []string{"1.0.0", "1.1.0"} // Register converters for all versions less than the implemented spec version func init() { @@ -38,10 +39,14 @@ func init() { convert.RegisterConverter("0.3.0", supportedVersions, convertFrom04x) convert.RegisterConverter("0.3.1", supportedVersions, convertFrom04x) convert.RegisterConverter("0.4.0", supportedVersions, convertFrom04x) + convert.RegisterConverter("1.0.0", []string{"1.1.0"}, convertFrom100) // Down-converters convert.RegisterConverter("1.0.0", []string{"0.3.0", "0.3.1", "0.4.0"}, convertTo04x) convert.RegisterConverter("1.0.0", []string{"0.1.0", "0.2.0"}, convertTo02x) + convert.RegisterConverter("1.1.0", []string{"0.3.0", "0.3.1", "0.4.0"}, convertTo04x) + convert.RegisterConverter("1.1.0", []string{"0.1.0", "0.2.0"}, convertTo02x) + convert.RegisterConverter("1.1.0", []string{"1.0.0"}, convertFrom100) // Creator convert.RegisterCreator(supportedVersions, NewResult) @@ -90,12 +95,49 @@ type Result struct { DNS types.DNS `json:"dns,omitempty"` } +// Note: DNS should be omit if DNS is empty but default Marshal function +// will output empty structure hence need to write a Marshal function +func (r *Result) MarshalJSON() ([]byte, error) { + // use type alias to escape recursion for json.Marshal() to MarshalJSON() + type fixObjType = Result + + bytes, err := json.Marshal(fixObjType(*r)) //nolint:all + if err != nil { + return nil, err + } + + fixupObj := make(map[string]interface{}) + if err := json.Unmarshal(bytes, &fixupObj); err != nil { + return nil, err + } + + if r.DNS.IsEmpty() { + delete(fixupObj, "dns") + } + + return json.Marshal(fixupObj) +} + +// convertFrom100 does nothing except set the version; the types are the same +func convertFrom100(from types.Result, toVersion string) (types.Result, error) { + fromResult := from.(*Result) + + result := &Result{ + CNIVersion: toVersion, + Interfaces: fromResult.Interfaces, + IPs: fromResult.IPs, + Routes: fromResult.Routes, + DNS: fromResult.DNS, + } + return result, nil +} + func convertFrom02x(from types.Result, toVersion string) (types.Result, error) { result040, err := convert.Convert(from, "0.4.0") if err != nil { return nil, err } - result100, err := convertFrom04x(result040, ImplementedSpecVersion) + result100, err := convertFrom04x(result040, toVersion) if err != nil { return nil, err } @@ -226,9 +268,12 @@ func (r *Result) PrintTo(writer io.Writer) error { // Interface contains values about the created interfaces type Interface struct { - Name string `json:"name"` - Mac string `json:"mac,omitempty"` - Sandbox string `json:"sandbox,omitempty"` + Name string `json:"name"` + Mac string `json:"mac,omitempty"` + Mtu int `json:"mtu,omitempty"` + Sandbox string `json:"sandbox,omitempty"` + SocketPath string `json:"socketPath,omitempty"` + PciID string `json:"pciID,omitempty"` } func (i *Interface) String() string { diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/types/args.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/types/args.go index 7516f03ef5..68a602bfdb 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/types/args.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/types/args.go @@ -26,8 +26,8 @@ import ( type UnmarshallableBool bool // UnmarshalText implements the encoding.TextUnmarshaler interface. -// Returns boolean true if the string is "1" or "[Tt]rue" -// Returns boolean false if the string is "0" or "[Ff]alse" +// Returns boolean true if the string is "1" or "true" or "True" +// Returns boolean false if the string is "0" or "false" or "False” func (b *UnmarshallableBool) UnmarshalText(data []byte) error { s := strings.ToLower(string(data)) switch s { diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/types/create/create.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/types/create/create.go index ed28b33e8e..452cb62201 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/types/create/create.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/types/create/create.go @@ -19,6 +19,9 @@ import ( "fmt" "github.com/containernetworking/cni/pkg/types" + _ "github.com/containernetworking/cni/pkg/types/020" + _ "github.com/containernetworking/cni/pkg/types/040" + _ "github.com/containernetworking/cni/pkg/types/100" convert "github.com/containernetworking/cni/pkg/types/internal" ) diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/types/types.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/types/types.go index fba17dfc0f..8453bb5d87 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/types/types.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/types/types.go @@ -56,30 +56,73 @@ func (n *IPNet) UnmarshalJSON(data []byte) error { return nil } -// NetConf describes a network. -type NetConf struct { +// NetConfType describes a network. +type NetConfType struct { CNIVersion string `json:"cniVersion,omitempty"` Name string `json:"name,omitempty"` Type string `json:"type,omitempty"` Capabilities map[string]bool `json:"capabilities,omitempty"` IPAM IPAM `json:"ipam,omitempty"` - DNS DNS `json:"dns"` + DNS DNS `json:"dns,omitempty"` RawPrevResult map[string]interface{} `json:"prevResult,omitempty"` PrevResult Result `json:"-"` + + // ValidAttachments is only supplied when executing a GC operation + ValidAttachments []GCAttachment `json:"cni.dev/valid-attachments,omitempty"` +} + +// NetConf is defined as different type as custom MarshalJSON() and issue #1096 +type NetConf NetConfType + +// GCAttachment is the parameters to a GC call -- namely, +// the container ID and ifname pair that represents a +// still-valid attachment. +type GCAttachment struct { + ContainerID string `json:"containerID"` + IfName string `json:"ifname"` +} + +// Note: DNS should be omit if DNS is empty but default Marshal function +// will output empty structure hence need to write a Marshal function +func (n *NetConfType) MarshalJSON() ([]byte, error) { + // use type alias to escape recursion for json.Marshal() to MarshalJSON() + type fixObjType = NetConf + + bytes, err := json.Marshal(fixObjType(*n)) + if err != nil { + return nil, err + } + + fixupObj := make(map[string]interface{}) + if err := json.Unmarshal(bytes, &fixupObj); err != nil { + return nil, err + } + + if n.DNS.IsEmpty() { + delete(fixupObj, "dns") + } + + return json.Marshal(fixupObj) } type IPAM struct { Type string `json:"type,omitempty"` } +// IsEmpty returns true if IPAM structure has no value, otherwise return false +func (i *IPAM) IsEmpty() bool { + return i.Type == "" +} + // NetConfList describes an ordered list of networks. type NetConfList struct { CNIVersion string `json:"cniVersion,omitempty"` Name string `json:"name,omitempty"` DisableCheck bool `json:"disableCheck,omitempty"` + DisableGC bool `json:"disableGC,omitempty"` Plugins []*NetConf `json:"plugins,omitempty"` } @@ -116,31 +159,48 @@ type DNS struct { Options []string `json:"options,omitempty"` } +// IsEmpty returns true if DNS structure has no value, otherwise return false +func (d *DNS) IsEmpty() bool { + if len(d.Nameservers) == 0 && d.Domain == "" && len(d.Search) == 0 && len(d.Options) == 0 { + return true + } + return false +} + func (d *DNS) Copy() *DNS { if d == nil { return nil } to := &DNS{Domain: d.Domain} - for _, ns := range d.Nameservers { - to.Nameservers = append(to.Nameservers, ns) - } - for _, s := range d.Search { - to.Search = append(to.Search, s) - } - for _, o := range d.Options { - to.Options = append(to.Options, o) - } + to.Nameservers = append(to.Nameservers, d.Nameservers...) + to.Search = append(to.Search, d.Search...) + to.Options = append(to.Options, d.Options...) return to } type Route struct { - Dst net.IPNet - GW net.IP + Dst net.IPNet + GW net.IP + MTU int + AdvMSS int + Priority int + Table *int + Scope *int } func (r *Route) String() string { - return fmt.Sprintf("%+v", *r) + table := "" + if r.Table != nil { + table = fmt.Sprintf("%d", *r.Table) + } + + scope := "" + if r.Scope != nil { + scope = fmt.Sprintf("%d", *r.Scope) + } + + return fmt.Sprintf("{Dst:%+v GW:%v MTU:%d AdvMSS:%d Priority:%d Table:%s Scope:%s}", r.Dst, r.GW, r.MTU, r.AdvMSS, r.Priority, table, scope) } func (r *Route) Copy() *Route { @@ -148,14 +208,30 @@ func (r *Route) Copy() *Route { return nil } - return &Route{ - Dst: r.Dst, - GW: r.GW, + route := &Route{ + Dst: r.Dst, + GW: r.GW, + MTU: r.MTU, + AdvMSS: r.AdvMSS, + Priority: r.Priority, + Scope: r.Scope, + } + + if r.Table != nil { + table := *r.Table + route.Table = &table } + + if r.Scope != nil { + scope := *r.Scope + route.Scope = &scope + } + + return route } // Well known error codes -// see https://github.com/containernetworking/cni/blob/master/SPEC.md#well-known-error-codes +// see https://github.com/containernetworking/cni/blob/main/SPEC.md#well-known-error-codes const ( ErrUnknown uint = iota // 0 ErrIncompatibleCNIVersion // 1 @@ -165,6 +241,7 @@ const ( ErrIOFailure // 5 ErrDecodingFailure // 6 ErrInvalidNetworkConfig // 7 + ErrInvalidNetNS // 8 ErrTryAgainLater uint = 11 ErrInternal uint = 999 ) @@ -200,8 +277,13 @@ func (e *Error) Print() error { // JSON (un)marshallable types type route struct { - Dst IPNet `json:"dst"` - GW net.IP `json:"gw,omitempty"` + Dst IPNet `json:"dst"` + GW net.IP `json:"gw,omitempty"` + MTU int `json:"mtu,omitempty"` + AdvMSS int `json:"advmss,omitempty"` + Priority int `json:"priority,omitempty"` + Table *int `json:"table,omitempty"` + Scope *int `json:"scope,omitempty"` } func (r *Route) UnmarshalJSON(data []byte) error { @@ -212,13 +294,24 @@ func (r *Route) UnmarshalJSON(data []byte) error { r.Dst = net.IPNet(rt.Dst) r.GW = rt.GW + r.MTU = rt.MTU + r.AdvMSS = rt.AdvMSS + r.Priority = rt.Priority + r.Table = rt.Table + r.Scope = rt.Scope + return nil } func (r Route) MarshalJSON() ([]byte, error) { rt := route{ - Dst: IPNet(r.Dst), - GW: r.GW, + Dst: IPNet(r.Dst), + GW: r.GW, + MTU: r.MTU, + AdvMSS: r.AdvMSS, + Priority: r.Priority, + Table: r.Table, + Scope: r.Scope, } return json.Marshal(rt) diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/utils/utils.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/utils/utils.go index b8ec388745..1981d25569 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/utils/utils.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/utils/utils.go @@ -36,7 +36,6 @@ var cniReg = regexp.MustCompile(`^` + cniValidNameChars + `*$`) // ValidateContainerID will validate that the supplied containerID is not empty does not contain invalid characters func ValidateContainerID(containerID string) *types.Error { - if containerID == "" { return types.NewError(types.ErrUnknownContainer, "missing containerID", "") } @@ -48,7 +47,6 @@ func ValidateContainerID(containerID string) *types.Error { // ValidateNetworkName will validate that the supplied networkName does not contain invalid characters func ValidateNetworkName(networkName string) *types.Error { - if networkName == "" { return types.NewError(types.ErrInvalidNetworkConfig, "missing network name:", "") } @@ -58,11 +56,11 @@ func ValidateNetworkName(networkName string) *types.Error { return nil } -// ValidateInterfaceName will validate the interface name based on the three rules below +// ValidateInterfaceName will validate the interface name based on the four rules below // 1. The name must not be empty // 2. The name must be less than 16 characters // 3. The name must not be "." or ".." -// 3. The name must not contain / or : or any whitespace characters +// 4. The name must not contain / or : or any whitespace characters // ref to https://github.com/torvalds/linux/blob/master/net/core/dev.c#L1024 func ValidateInterfaceName(ifName string) *types.Error { if len(ifName) == 0 { diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/version/plugin.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/version/plugin.go index 17b22b6b0c..e3bd375bca 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/version/plugin.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/version/plugin.go @@ -142,3 +142,27 @@ func GreaterThanOrEqualTo(version, otherVersion string) (bool, error) { } return false, nil } + +// GreaterThan returns true if the first version is greater than the second +func GreaterThan(version, otherVersion string) (bool, error) { + firstMajor, firstMinor, firstMicro, err := ParseVersion(version) + if err != nil { + return false, err + } + + secondMajor, secondMinor, secondMicro, err := ParseVersion(otherVersion) + if err != nil { + return false, err + } + + if firstMajor > secondMajor { + return true, nil + } else if firstMajor == secondMajor { + if firstMinor > secondMinor { + return true, nil + } else if firstMinor == secondMinor && firstMicro > secondMicro { + return true, nil + } + } + return false, nil +} diff --git a/go-controller/vendor/github.com/containernetworking/cni/pkg/version/version.go b/go-controller/vendor/github.com/containernetworking/cni/pkg/version/version.go index 1326f8038e..a4d442c8ec 100644 --- a/go-controller/vendor/github.com/containernetworking/cni/pkg/version/version.go +++ b/go-controller/vendor/github.com/containernetworking/cni/pkg/version/version.go @@ -19,13 +19,12 @@ import ( "fmt" "github.com/containernetworking/cni/pkg/types" - types100 "github.com/containernetworking/cni/pkg/types/100" "github.com/containernetworking/cni/pkg/types/create" ) // Current reports the version of the CNI spec implemented by this library func Current() string { - return types100.ImplementedSpecVersion + return "1.1.0" } // Legacy PluginInfo describes a plugin that is backwards compatible with the @@ -35,8 +34,10 @@ func Current() string { // // Any future CNI spec versions which meet this definition should be added to // this list. -var Legacy = PluginSupports("0.1.0", "0.2.0") -var All = PluginSupports("0.1.0", "0.2.0", "0.3.0", "0.3.1", "0.4.0", "1.0.0") +var ( + Legacy = PluginSupports("0.1.0", "0.2.0") + All = PluginSupports("0.1.0", "0.2.0", "0.3.0", "0.3.1", "0.4.0", "1.0.0", "1.1.0") +) // VersionsFrom returns a list of versions starting from min, inclusive func VersionsStartingFrom(min string) PluginInfo { diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1/types.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1/types.go index 7e202ed8d0..042a5867e0 100644 --- a/go-controller/vendor/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1/types.go +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1/types.go @@ -2,9 +2,9 @@ package v1 import ( "encoding/json" - "errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "net" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // +genclient @@ -107,6 +107,7 @@ type NetworkStatus struct { Interface string `json:"interface,omitempty"` IPs []string `json:"ips,omitempty"` Mac string `json:"mac,omitempty"` + Mtu int `json:"mtu,omitempty"` Default bool `json:"default,omitempty"` DNS DNS `json:"dns,omitempty"` DeviceInfo *DeviceInfo `json:"device-info,omitempty"` @@ -176,9 +177,6 @@ func (nse *NetworkSelectionElement) UnmarshalJSON(b []byte) error { if err := json.Unmarshal(b, &netSelectionElement); err != nil { return err } - if len(netSelectionElement.IPRequest) > 0 && netSelectionElement.IPAMClaimReference != "" { - return TooManyIPSources - } *nse = NetworkSelectionElement(netSelectionElement) return nil } @@ -197,5 +195,3 @@ type NoK8sNetworkError struct { } func (e *NoK8sNetworkError) Error() string { return string(e.Message) } - -var TooManyIPSources = errors.New("cannot provide a static IP and a reference of an IPAM claim in the same network selection element") diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils/net-attach-def.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils/net-attach-def.go index 4bca1645fb..acebe13a4a 100644 --- a/go-controller/vendor/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils/net-attach-def.go +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils/net-attach-def.go @@ -122,6 +122,117 @@ func GetNetworkStatus(pod *corev1.Pod) ([]v1.NetworkStatus, error) { return netStatuses, err } +// gatewayInterfaceIndex determines the index of the first interface that has a gateway +func gatewayInterfaceIndex(ips []*cni100.IPConfig) int { + for _, ipConfig := range ips { + if ipConfig.Gateway != nil && ipConfig.Interface != nil { + return *ipConfig.Interface + } + } + return -1 +} + +// CreateNetworkStatuses creates an array of NetworkStatus from CNI result +// Not to be confused with CreateNetworkStatus (singular) +// This is the preferred method and picks up when CNI ADD results contain multiple container interfaces +func CreateNetworkStatuses(r cnitypes.Result, networkName string, defaultNetwork bool, dev *v1.DeviceInfo) ([]*v1.NetworkStatus, error) { + var networkStatuses []*v1.NetworkStatus + // indexMap is from original CNI result index to networkStatuses index + indexMap := make(map[int]int) + + // Convert whatever the IPAM result was into the current Result type + result, err := cni100.NewResultFromResult(r) + if err != nil { + return nil, fmt.Errorf("error converting the type.Result to cni100.Result: %v", err) + } + + if len(result.Interfaces) == 1 { + networkStatus, err := CreateNetworkStatus(r, networkName, defaultNetwork, dev) + return []*v1.NetworkStatus{networkStatus}, err + } + + // Discover default routes upfront and reuse them if necessary. + var useDefaultRoute []string + for _, route := range result.Routes { + if isDefaultRoute(route) { + useDefaultRoute = append(useDefaultRoute, route.GW.String()) + } + } + + // Same for DNS + v1dns := convertDNS(result.DNS) + + // Check for a gateway-associated interface, we'll use this later if we did to mark as the default. + gwInterfaceIdx := -1 + if defaultNetwork { + gwInterfaceIdx = gatewayInterfaceIndex(result.IPs) + } + + // Initialize NetworkStatus for each container interface (e.g. with sandbox present) + indexOfFoundPodInterface := 0 + foundFirstSandboxIface := false + didSetDefault := false + for i, iface := range result.Interfaces { + if iface.Sandbox != "" { + isDefault := false + + // If there's a gateway listed for this interface index found in the ips, we mark that interface as default + // notably, we use the first one we find. + if defaultNetwork && i == gwInterfaceIdx && !didSetDefault { + isDefault = true + didSetDefault = true + } + + // Otherwise, if we didn't find it, we use the first sandbox interface. + if defaultNetwork && gwInterfaceIdx == -1 && !foundFirstSandboxIface { + isDefault = true + foundFirstSandboxIface = true + } + + ns := &v1.NetworkStatus{ + Name: networkName, + Default: isDefault, + Interface: iface.Name, + Mac: iface.Mac, + Mtu: iface.Mtu, + IPs: []string{}, + Gateway: useDefaultRoute, + DeviceInfo: dev, + DNS: *v1dns, + } + networkStatuses = append(networkStatuses, ns) + // Map original index to the new slice index + indexMap[i] = indexOfFoundPodInterface + indexOfFoundPodInterface++ + } + } + + var defaultNetworkStatus *v1.NetworkStatus + if len(networkStatuses) > 0 { + // Set the default network status to the last network status. + defaultNetworkStatus = networkStatuses[len(networkStatuses)-1] + } + + // Map IPs to network interface based on index + for _, ipConfig := range result.IPs { + if ipConfig.Interface != nil { + originalIndex := *ipConfig.Interface + if newIndex, ok := indexMap[originalIndex]; ok { + ns := networkStatuses[newIndex] + ns.IPs = append(ns.IPs, ipConfig.Address.IP.String()) + } + } else { + // If the IPs don't specify the interface assign the IP to the default network status. This keeps the behaviour + // consistent with previous multus versions. + if defaultNetworkStatus != nil { + defaultNetworkStatus.IPs = append(defaultNetworkStatus.IPs, ipConfig.Address.IP.String()) + } + } + } + + return networkStatuses, nil +} + // CreateNetworkStatus create NetworkStatus from CNI result func CreateNetworkStatus(r cnitypes.Result, networkName string, defaultNetwork bool, dev *v1.DeviceInfo) (*v1.NetworkStatus, error) { netStatus := &v1.NetworkStatus{} @@ -139,6 +250,7 @@ func CreateNetworkStatus(r cnitypes.Result, networkName string, defaultNetwork b if ifs.Sandbox != "" { netStatus.Interface = ifs.Name netStatus.Mac = ifs.Mac + netStatus.Mtu = ifs.Mtu } } diff --git a/go-controller/vendor/modules.txt b/go-controller/vendor/modules.txt index 7636490960..7564f7a89c 100644 --- a/go-controller/vendor/modules.txt +++ b/go-controller/vendor/modules.txt @@ -56,10 +56,11 @@ github.com/cespare/xxhash/v2 # github.com/containerd/cgroups v1.1.0 ## explicit; go 1.17 github.com/containerd/cgroups/stats/v1 -# github.com/containernetworking/cni v1.1.2 -## explicit; go 1.14 +# github.com/containernetworking/cni v1.2.3 +## explicit; go 1.21 github.com/containernetworking/cni/libcni github.com/containernetworking/cni/pkg/invoke +github.com/containernetworking/cni/pkg/ns github.com/containernetworking/cni/pkg/skel github.com/containernetworking/cni/pkg/types github.com/containernetworking/cni/pkg/types/020 @@ -229,7 +230,7 @@ github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/informers/externa github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1beta2 github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/listers/k8s.cni.cncf.io/v1beta1 github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/listers/k8s.cni.cncf.io/v1beta2 -# github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0 +# github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.7.7 ## explicit; go 1.21 github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1 diff --git a/mkdocs.yml b/mkdocs.yml index f82f75c977..82ce0965e0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -110,6 +110,8 @@ nav: - AdminPolicyBasedExternalRoutes: api-reference/admin-epbr-api-spec.md - UserDefinedNetwork: api-reference/userdefinednetwork-api-spec.md - Features: + - Universal Connectivity: + - UserDefinedNetwork: features/user-defined-networks/user-defined-network.md - NetworkSecurityControls: - AdminNetworkPolicy: features/network-security-controls/admin-network-policy.md - NetworkPolicy: features/network-security-controls/network-policy.md diff --git a/test/e2e/go.mod b/test/e2e/go.mod index d9d67fb0c4..f336f1c7ce 100644 --- a/test/e2e/go.mod +++ b/test/e2e/go.mod @@ -8,7 +8,7 @@ require ( github.com/google/go-cmp v0.6.0 github.com/k8snetworkplumbingwg/ipamclaims v0.5.0-alpha github.com/k8snetworkplumbingwg/multi-networkpolicy v1.0.1 - github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0 + github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.7.7 github.com/onsi/ginkgo/v2 v2.22.0 github.com/onsi/gomega v1.36.1 github.com/pkg/errors v0.9.1 @@ -39,7 +39,7 @@ require ( github.com/containerd/errdefs v0.1.0 // indirect github.com/containerd/log v0.1.0 // indirect github.com/containerd/ttrpc v1.2.5 // indirect - github.com/containernetworking/cni v1.1.2 // indirect + github.com/containernetworking/cni v1.2.3 // indirect github.com/coreos/go-iptables v0.6.0 // indirect github.com/coreos/go-json v0.0.0-20230131223807-18775e0fb4fb // indirect github.com/coreos/go-semver v0.3.1 // indirect diff --git a/test/e2e/go.sum b/test/e2e/go.sum index 900d7aa612..ce895b56ed 100644 --- a/test/e2e/go.sum +++ b/test/e2e/go.sum @@ -95,8 +95,8 @@ github.com/containerd/ttrpc v1.2.5 h1:IFckT1EFQoFBMG4c3sMdT8EP3/aKfumK1msY+Ze4oL github.com/containerd/ttrpc v1.2.5/go.mod h1:YCXHsb32f+Sq5/72xHubdiJRQY9inL4a4ZQrAbN1q9o= github.com/containerd/typeurl/v2 v2.2.0 h1:6NBDbQzr7I5LHgp34xAXYF5DOTQDn05X58lsPEmzLso= github.com/containerd/typeurl/v2 v2.2.0/go.mod h1:8XOOxnyatxSWuG8OfsZXVnAF4iZfedjS/8UHSPJnX4g= -github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ= -github.com/containernetworking/cni v1.1.2/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw= +github.com/containernetworking/cni v1.2.3 h1:hhOcjNVUQTnzdRJ6alC5XF+wd9mfGIUaj8FuJbEslXM= +github.com/containernetworking/cni v1.2.3/go.mod h1:DuLgF+aPd3DzcTQTtp/Nvl1Kim23oFKdm2okJzBQA5M= github.com/containernetworking/plugins v1.2.0 h1:SWgg3dQG1yzUo4d9iD8cwSVh1VqI+bP7mkPDoSfP9VU= github.com/containernetworking/plugins v1.2.0/go.mod h1:/VjX4uHecW5vVimFa1wkG4s+r/s9qIfPdqlLF4TW8c4= github.com/coreos/butane v0.18.0 h1:WDeUC/dX1MUUVPwiqsQetQZsShNKk+2lrRXlC4ZhnZA= @@ -335,8 +335,8 @@ github.com/k8snetworkplumbingwg/ipamclaims v0.5.0-alpha h1:b3iHeks/KTzhG2dNanaUZ github.com/k8snetworkplumbingwg/ipamclaims v0.5.0-alpha/go.mod h1:MGaMX1tJ7MlHDee4/xmqp3guQh+eDiuCLAauqD9K11Q= github.com/k8snetworkplumbingwg/multi-networkpolicy v1.0.1 h1:Egj1hEVYNXWFlKpgzAXxe/2o8VNiVcAJLrKzlinILQo= github.com/k8snetworkplumbingwg/multi-networkpolicy v1.0.1/go.mod h1:kEJ4WM849yNmXekuSXLRwb+LaZ9usC06O8JgoAIq+f4= -github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0 h1:BT3ghAY0q7lWib9rz+tVXDFkm27dJV6SLCn7TunZwo4= -github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0/go.mod h1:wxt2YWRVItDtaQmVSmaN5ubE2L1c9CiNoHQwSJnM8Ko= +github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.7.7 h1:z4P744DR+PIpkjwXSEc6TvN3L6LVzmUquFgmNm8wSUc= +github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.7.7/go.mod h1:CM7HAH5PNuIsqjMN0fGc1ydM74Uj+0VZFhob620nklw= github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc h1:v6+jUd70AayPbIRgTYUNpnBLG5cBPTY0+10y80CZeMk= github.com/k8snetworkplumbingwg/sriovnet v1.2.1-0.20230427090635-4929697df2dc/go.mod h1:jyWzGe6ZtYiPq6ih6aXCOy6mZ49Y9mNyBOLBBXnli+k= github.com/karrick/godirwalk v1.17.0 h1:b4kY7nqDdioR/6qnbHQyDvmA17u5G1cZ6J+CZXwSWoI= @@ -413,7 +413,6 @@ github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vv github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/ginkgo/v2 v2.0.0/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= -github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= diff --git a/test/e2e/kubevirt.go b/test/e2e/kubevirt.go index 67ab2e290a..19f628b464 100644 --- a/test/e2e/kubevirt.go +++ b/test/e2e/kubevirt.go @@ -1531,12 +1531,15 @@ fi cmd func() string } var ( - cudn *udnv1.ClusterUserDefinedNetwork - vm *kubevirtv1.VirtualMachine - vmi *kubevirtv1.VirtualMachineInstance - cidrIPv4 = "10.128.0.0/24" - cidrIPv6 = "2010:100:200::0/60" - restart = testCommand{ + cudn *udnv1.ClusterUserDefinedNetwork + vm *kubevirtv1.VirtualMachine + vmi *kubevirtv1.VirtualMachineInstance + cidrIPv4 = "10.128.0.0/24" + cidrIPv6 = "2010:100:200::0/60" + staticIPv4 = "10.128.0.101" + staticIPv6 = "2010:100:200::101" + staticMAC = "02:00:00:00:00:01" + restart = testCommand{ description: "restart", cmd: func() { By("Restarting vm") @@ -1619,6 +1622,29 @@ write_files: }, } + virtualMachineWithUDNAndStaticIPsAndMAC = resourceCommand{ + description: "VirtualMachine with interface binding for UDN and statics IPs and MAC", + cmd: func() string { + GinkgoHelper() + if !isPreConfiguredUdnAddressesEnabled() { + Skip("ENABLE_PRE_CONF_UDN_ADDR not configured") + } + + annotations, err := kubevirt.GenerateAddressesAnnotations("net1", filterIPs(fr.ClientSet, staticIPv4, staticIPv6)) + Expect(err).NotTo(HaveOccurred()) + + vm = fedoraWithTestToolingVM(nil /*labels*/, annotations, nil, /*nodeSelector*/ + kubevirtv1.NetworkSource{ + Pod: &kubevirtv1.PodNetwork{}, + }, userDataWithIperfServer, networkDataDualStack) + vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].Bridge = nil + vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].Binding = &kubevirtv1.PluginBinding{Name: "l2bridge"} + vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].MacAddress = staticMAC + createVirtualMachine(vm) + return vm.Name + }, + } + virtualMachineInstance = resourceCommand{ description: "VirtualMachineInstance", cmd: func() string { @@ -1669,6 +1695,8 @@ write_files: topology udnv1.NetworkTopology role udnv1.NetworkRole ingress string + ipRequests []string + macRequest string } var ( containerNetwork = func(td testData) (infraapi.Network, error) { @@ -1817,6 +1845,12 @@ ip route add %[3]s via %[4]s step = by(vmi.Name, "Wait for addresses at the virtual machine") expectedNumberOfAddresses := len(dualCIDRs) expectedAddreses := virtualMachineAddressesFromStatus(vmi, expectedNumberOfAddresses) + if _, hasIPRequests := vmi.Annotations[kubevirt.AddressesAnnotation]; hasIPRequests { + Expect(expectedAddreses).To(ConsistOf(filterIPs(fr.ClientSet, staticIPv4, staticIPv6)), "expected addresses should be consistent with the static IPs") + } + if vmi.Spec.Domain.Devices.Interfaces[0].MacAddress != "" { + Expect(vmi.Spec.Domain.Devices.Interfaces[0].MacAddress).To(Equal(vmi.Status.Interfaces[0].MAC), "expected mac address should be consistent with the static MAC") + } expectedAddresesAtGuest := expectedAddreses testPodsIPs := podsMultusNetworkIPs(iperfServerTestPods, podNetworkStatusByNetConfigPredicate(namespace, cudn.Name, strings.ToLower(string(td.role)))) @@ -1988,6 +2022,25 @@ ip route add %[3]s via %[4]s topology: udnv1.NetworkTopologyLayer2, role: udnv1.NetworkRolePrimary, }), + Entry(nil, testData{ + resource: virtualMachineWithUDNAndStaticIPsAndMAC, + test: liveMigrate, + topology: udnv1.NetworkTopologyLayer2, + role: udnv1.NetworkRolePrimary, + }), + Entry(nil, testData{ + resource: virtualMachineWithUDNAndStaticIPsAndMAC, + test: restart, + topology: udnv1.NetworkTopologyLayer2, + role: udnv1.NetworkRolePrimary, + }), + Entry(nil, testData{ + resource: virtualMachineWithUDNAndStaticIPsAndMAC, + test: liveMigrate, + topology: udnv1.NetworkTopologyLayer2, + role: udnv1.NetworkRolePrimary, + ingress: "routed", + }), Entry(nil, testData{ resource: virtualMachineWithUDN, test: liveMigrate, @@ -2170,7 +2223,6 @@ ip route add %[3]s via %[4]s vmiIPv4 = "10.128.0.100/24" vmiIPv6 = "2010:100:200::100/60" vmiMAC = "0A:58:0A:80:00:64" - cidrs = []string{ipv4CIDR, ipv6CIDR} staticIPsNetworkData = func(ips []string) (string, error) { type Ethernet struct { Addresses []string `json:"addresses,omitempty"` @@ -2209,7 +2261,7 @@ chpasswd: { expire: False } selectedNodes = workerNodeList.Items Expect(selectedNodes).NotTo(BeEmpty()) - iperfServerTestPods, err = createIperfServerPods(selectedNodes, cudn.Name, cudn.Spec.Network.Localnet.Role, filterCIDRs(fr.ClientSet, cidrs...)) + iperfServerTestPods, err = createIperfServerPods(selectedNodes, cudn.Name, cudn.Spec.Network.Localnet.Role, filterCIDRs(fr.ClientSet, ipv4CIDR, ipv6CIDR)) Expect(err).NotTo(HaveOccurred()) networkData, err := staticIPsNetworkData(filterCIDRs(fr.ClientSet, vmiIPv4, vmiIPv6)) diff --git a/test/e2e/kubevirt/pod.go b/test/e2e/kubevirt/pod.go index 1293e1acc5..b6153e731a 100644 --- a/test/e2e/kubevirt/pod.go +++ b/test/e2e/kubevirt/pod.go @@ -1,6 +1,7 @@ package kubevirt import ( + "encoding/json" "fmt" infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" @@ -11,6 +12,10 @@ import ( kubevirtv1 "kubevirt.io/api/core/v1" ) +const ( + AddressesAnnotation = "network.kubevirt.io/addresses" +) + func GenerateFakeVirtLauncherPod(namespace, vmName string) *corev1.Pod { return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -48,3 +53,15 @@ kill -9 $pid } return nil } + +func GenerateAddressesAnnotations(networkName string, addresses []string) (map[string]string, error) { + staticIPs, err := json.Marshal(map[string][]string{ + networkName: addresses, + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal static IPs: %w", err) + } + return map[string]string{ + AddressesAnnotation: string(staticIPs), + }, nil +} diff --git a/test/e2e/network_segmentation_default_network_annotation.go b/test/e2e/network_segmentation_default_network_annotation.go new file mode 100644 index 0000000000..11849186f5 --- /dev/null +++ b/test/e2e/network_segmentation_default_network_annotation.go @@ -0,0 +1,106 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + + udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" + udnclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned" +) + +var _ = Describe("Network Segmentation: Default network multus annotation", func() { + var ( + f = wrappedTestFramework("default-network-annotation") + ) + f.SkipNamespaceCreation = true + + type testCase struct { + ips []string + mac string + } + DescribeTable("when added with static IP and MAC to a pod belonging to primary UDN", func(tc testCase) { + if !isPreConfiguredUdnAddressesEnabled() { + Skip("ENABLE_PRE_CONF_UDN_ADDR not configured") + } + tc.ips = filterCIDRs(f.ClientSet, tc.ips...) + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + Expect(err).NotTo(HaveOccurred(), "Should create namespace for test") + f.Namespace = namespace + + // Create the UDN client using the framework's config + udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) + Expect(err).NotTo(HaveOccurred(), "Should create UDN client") + + // Define the UserDefinedNetwork object + udn := &udnv1.UserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "l2network", + Namespace: f.Namespace.Name, + }, + Spec: udnv1.UserDefinedNetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: filterDualStackCIDRs(f.ClientSet, []udnv1.CIDR{ + udnv1.CIDR("103.0.0.0/16"), + udnv1.CIDR("2014:100:200::0/60"), + }), + }, + }, + } + + // Create the resource in the generated namespace + By("Create a UserDefinedNetwork with Layer2 topology and wait for availability") + udn, err = udnClient.K8sV1().UserDefinedNetworks(f.Namespace.Name).Create(context.TODO(), udn, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred(), "Should create UserDefinedNetwork") + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udn.Namespace, udn.Name), 5*time.Second, time.Second).Should(Succeed()) + + // Create the Pod in the generated namespace + By("Create a Pod with the default network annotation and wait for readiness") + ips, err := json.Marshal(tc.ips) + Expect(err).NotTo(HaveOccurred(), "Should marshal IPs for annotation") + + // Define the Pod object with the specified annotation + By("Creating the pod with the default network annotation and wait for readiness") + pod := e2epod.NewAgnhostPod(f.Namespace.Name, "static-ip-mac-pod", nil, nil, nil) + pod.Annotations = map[string]string{ + "v1.multus-cni.io/default-network": fmt.Sprintf(`[{"name":"default", "namespace":"ovn-kubernetes", "mac":%q, "ips": %s}]`, tc.mac, string(ips)), + } + pod.Spec.Containers[0].Command = []string{"sleep", "infinity"} + pod = e2epod.NewPodClient(f).CreateSync(context.TODO(), pod) + + netStatus, err := podNetworkStatus(pod, func(status nadapi.NetworkStatus) bool { + return status.Default + }) + Expect(err).NotTo(HaveOccurred(), "Should get network status from pod") + Expect(netStatus).To(HaveLen(1), "Should have one network status for the default network") + var exposedIPs []string + + // Remove the CIDR from the IPs to expose only the IPs + for _, ip := range tc.ips { + exposedIPs = append(exposedIPs, strings.Split(ip, "/")[0]) + } + Expect(netStatus[0].IPs).To(ConsistOf(exposedIPs), "Should have the IPs specified in the default network annotation") + Expect(strings.ToLower(netStatus[0].Mac)).To(Equal(strings.ToLower(tc.mac)), "Should have the MAC specified in the default network annotation") + + }, + + Entry("should create the pod with the specified static IP and MAC address", testCase{ + ips: []string{"103.0.0.3/16", "2014:100:200::3/60"}, + mac: "02:A1:B2:C3:D4:E5", + }), + ) +}) diff --git a/test/e2e/route_advertisements.go b/test/e2e/route_advertisements.go index f65dd60631..36c0c5c950 100644 --- a/test/e2e/route_advertisements.go +++ b/test/e2e/route_advertisements.go @@ -28,7 +28,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/test/e2e/label" corev1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -626,7 +625,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" } // create host networked Pods - _, err := createPod(f, node.Name+"-hostnet-ep", node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { + _, err := createPod(f, node.Name+"-hostnet-ep", node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *corev1.Pod) { p.Spec.Containers[0].Args = args p.Spec.HostNetwork = true }) @@ -652,6 +651,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" svc.Spec.Ports = []corev1.ServicePort{{Port: 8080}} familyPolicy := corev1.IPFamilyPolicyPreferDualStack svc.Spec.IPFamilyPolicy = &familyPolicy + svc.Spec.Type = corev1.ServiceTypeNodePort svcNetA, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -675,6 +675,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" svc.Name = fmt.Sprintf("service-default") svc.Namespace = "default" svc.Spec.Selector = pod.Labels + svc.Spec.Type = corev1.ServiceTypeNodePort svcNetDefault, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -754,6 +755,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" } if svcNetDefault != nil { err = f.ClientSet.CoreV1().Services(svcNetDefault.Namespace).Delete(context.Background(), svcNetDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) svcNetDefault = nil } @@ -954,11 +956,11 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 // 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264, // offset 0, flags [DF], proto TCP (6), length 60) - // 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280, + // 169.154.169.12.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280, // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 // 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, // offset 0, flags [DF], proto TCP (6), length 40) - // 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0 + // 172.18.0.2.25022 > 169.154.169.12.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0 // 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, // offset 0, flags [DF], proto TCP (6), length 40) // 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0 @@ -971,25 +973,116 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) framework.ExpectNoError(err) nodeIP := node.Status.Addresses[ipFamilyIndex].Address - errBool := false - out := "" - if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { - // FIXME: this should be removed once we add the SNAT for pod->node traffic - // We now permit asymmetric traffic on LGW. This prevents the issue from occurring with IPv6. - // However, for IPv4 LGW rp_filter is still blocking the replies. - // The situation is different on SGW as we don't allow asymmetric traffic at all, which is why IPv6 traffic fails there too. - if ipFamilyIndex == ipFamilyV4 || !isLocalGWModeEnabled() { - // FIXME: fix assymmetry in L2 UDNs - // bad behaviour: packet is coming from other node -> entering eth0 -> bretho and here kernel drops the packet since - // rp_filter is set to 1 in breth0 and there is an iprule that sends the packet to mpX interface so kernel sees the packet - // having return path different from the incoming interface. - // The SNAT to nodeIP should fix this. - // this causes curl timeout with code 28 - errBool = true - out = curlConnectionTimeoutCode - } + + clientNode, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + clientNodeIP := clientNode.Status.Addresses[ipFamilyIndex].Address + // pod -> node traffic should use the node's IP as the source for advertised UDNs. + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/clientip", clientNodeIP, false + }), + ginkgo.Entry("UDN pod to the same node nodeport service in default network should not work", + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] is on nodes[0]. We need the same node. Let's hit the nodeport on nodes[0]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIP := node.Status.Addresses[ipFamilyIndex].Address + nodePort := svcNetDefault.Spec.Ports[0].NodePort + + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("UDN pod to a different node nodeport service in default network should work", + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. + // The service is backed by podNetDefault. Let's hit the nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIP := node.Status.Addresses[ipFamilyIndex].Address + nodePort := svcNetDefault.Spec.Ports[0].NodePort + + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("UDN pod to the same node nodeport service in same UDN network should work", + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by pods in podsNetA. + // We want to hit the nodeport on the same node. + // client is on nodes[0]. Let's hit nodeport on nodes[0]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIP := node.Status.Addresses[ipFamilyIndex].Address + nodePort := svcNetA.Spec.Ports[0].NodePort + + // The service can be backed by any of the pods in podsNetA, so we can't reliably check the output hostname. + // Just check that the connection is successful. + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("UDN pod to a different node nodeport service in same UDN network should work", + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by pods in podsNetA. + // We want to hit the nodeport on a different node. + // client is on nodes[0]. Let's hit nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIP := node.Status.Addresses[ipFamilyIndex].Address + nodePort := svcNetA.Spec.Ports[0].NodePort + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("UDN pod to the same node nodeport service in different UDN network should not work", + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 only) networks. + // Reason it doesn't work today is because UDN networks don't have MAC bindings for masqueradeIPs of other networks. + // Traffic flow: UDN pod in network A -> samenode nodeIP:nodePort service of networkB + // UDN pod in networkA -> ovn-switch -> ovn-cluster-router (SNAT to masqueradeIP of networkA) -> mpX interface -> + // enters the host and hits IPTables rules to DNAT to clusterIP:Port of service of networkB. + // Then it hits the pkt_mark flows on breth0 and get's sent into networkB's patchport where it hits the GR. + // On the GR we DNAT to backend pod and SNAT to joinIP. + // Reply: Pod replies and now OVN in networkB tries to ARP for the masqueradeIP of networkA which is the source and simply + // fails as it doesn't know how to reach this masqueradeIP. + // There is also inconsistency in behaviour within Layer2 networks for how IPv4 works and how IPv6 works where the traffic + // works on ipv6 because of the flows described below. + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIP := node.Status.Addresses[ipFamilyIndex].Address + nodePort := svcNetB.Spec.Ports[0].NodePort + out := curlConnectionTimeoutCode + errBool := true + if ipFamilyIndex == ipFamilyV6 && cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { + // For Layer2 networks, we have these flows we add on breth0: + // cookie=0xdeff105, duration=173.245s, table=1, n_packets=0, n_bytes=0, idle_age=173, priority=14,icmp6,icmp_type=134 actions=FLOOD + // cookie=0xdeff105, duration=173.245s, table=1, n_packets=8, n_bytes=640, idle_age=4, priority=14,icmp6,icmp_type=136 actions=FLOOD + // which floods the Router Advertisement (RA, type 134) and Neighbor Advertisement (NA, type 136) + // Given on Layer2 the GR has the SNATs for both masqueradeIPs this works perfectly well and + // the networks are able to NDP for the masqueradeIPs for the other networks. + // This doesn't work on Layer3 networks since masqueradeIP SNATs are present on the ovn-cluster-router in that case. + // See the tcpdump on the issue: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 for more details. + out = "" + errBool = false } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", out, errBool + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", out, errBool + }), + ginkgo.Entry("UDN pod to a different node nodeport service in different UDN network should work", + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by podNetB. + // We want to hit the nodeport on a different node from the client. + // client is on nodes[0]. Let's hit nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIP := node.Status.Addresses[ipFamilyIndex].Address + nodePort := svcNetB.Spec.Ports[0].NodePort + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false }), ) diff --git a/test/e2e/util.go b/test/e2e/util.go index d03559e79e..ff7cfe66d1 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -32,6 +32,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" + clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework/debug" e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" @@ -1137,6 +1138,12 @@ func isCIDRIPFamilySupported(cs kubernetes.Interface, cidr string) bool { return (isIPv4Supported(cs) && !isIPv6) || (isIPv6Supported(cs) && isIPv6) } +func isIPFamilySupported(cs clientset.Interface, cidr string) bool { + ginkgo.GinkgoHelper() + isIPv6 := utilnet.IsIPv6String(cidr) + return (isIPv4Supported(cs) && !isIPv6) || (isIPv6Supported(cs) && isIPv6) +} + func isIPv4Supported(cs kubernetes.Interface) bool { v4, _ := getSupportedIPFamilies(cs) return v4 @@ -1147,6 +1154,17 @@ func isIPv6Supported(cs kubernetes.Interface) bool { return v6 } +func filterIPs(cs clientset.Interface, cidrs ...string) []string { + var supportedCIDRs []string + for _, cidr := range cidrs { + if !isIPFamilySupported(cs, cidr) { + continue + } + supportedCIDRs = append(supportedCIDRs, cidr) + } + return supportedCIDRs +} + func getSupportedIPFamilies(cs kubernetes.Interface) (bool, bool) { n, err := e2enode.GetRandomReadySchedulableNode(context.TODO(), cs) framework.ExpectNoError(err, "must fetch a Ready Node") @@ -1183,6 +1201,12 @@ func isLocalGWModeEnabled() bool { return present && val == "local" } +func isPreConfiguredUdnAddressesEnabled() bool { + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + val := getTemplateContainerEnv(ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), "OVN_PRE_CONF_UDN_ADDR_ENABLE") + return val == "true" +} + func singleNodePerZone() bool { if singleNodePerZoneResult == nil { args := []string{"get", "pods", "--selector=app=ovnkube-node", "-o", "jsonpath={.items[0].spec.containers[*].name}"}