diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index ff8ca1f6f3..612abb681e 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -14,7 +14,7 @@ env: REPOSITORY: ovn-kubernetes FEDORA_IMAGE_NAME: ovn-kube-fedora UBUNTU_IMAGE_NAME: ovn-kube-ubuntu - + BUILDER_IMAGE: quay.io/lib/golang:1.24 jobs: build: name: Build Images @@ -84,9 +84,11 @@ jobs: uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} - context: ./dist/images + context: . file: ./dist/images/Dockerfile.fedora push: true + build-args: | + BUILDER_IMAGE=${{ env.BUILDER_IMAGE }} platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta-fedora.outputs.tags }} labels: ${{ steps.meta-fedora.outputs.labels }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5e84746572..b8ce5ce3d7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,8 +24,9 @@ env: KIND_CLUSTER_NAME: ovn KIND_INSTALL_INGRESS: true KIND_ALLOW_SYSTEM_WRITES: true - # This skips tests tagged as Serial - # Current Serial tests are not relevant for OVN + ENABLE_COREDUMPS: true + # This skips tests tagged as Serial for most lanes + # Serial tests are run in a dedicated lane PARALLEL: true # This must be a directory @@ -436,7 +437,7 @@ jobs: fail-fast: false matrix: # Valid options are: - # target: ["shard-conformance", "control-plane", "multi-homing", "multi-node-zones", "node-ip-mac-migration", "compact-mode"] + # target: ["shard-conformance", "control-plane", "multi-homing", "multi-node-zones", "node-ip-mac-migration", "compact-mode", "serial"] # shard-conformance: hybrid-overlay = multicast-enable = emptylb-enable = false # control-plane: hybrid-overlay = multicast-enable = emptylb-enable = true # ha: ["HA", "noHA"] @@ -490,6 +491,7 @@ jobs: - {"target": "bgp-loose-isolation", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "advertised-udn-isolation-mode": "loose"} - {"target": "traffic-flow-test-only","ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "traffic-flow-tests": "1-24", "network-segmentation": "enable-network-segmentation"} - {"target": "tools", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "network-segmentation": "enable-network-segmentation"} + - {"target": "serial", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} needs: [ build-pr ] env: JOB_NAME: "${{ matrix.target }}-${{ matrix.ha }}-${{ matrix.gateway-mode }}-${{ matrix.ipfamily }}-${{ matrix.disable-snat-multiple-gws }}-${{ matrix.second-bridge }}-${{ matrix.ic }}" @@ -519,7 +521,10 @@ jobs: ENABLE_ROUTE_ADVERTISEMENTS: "${{ matrix.routeadvertisements != '' }}" ADVERTISE_DEFAULT_NETWORK: "${{ matrix.routeadvertisements == 'advertise-default' }}" ENABLE_PRE_CONF_UDN_ADDR: "${{ matrix.ic == 'ic-single-node-zones' && (matrix.target == 'network-segmentation' || matrix.network-segmentation == 'enable-network-segmentation') }}" + ENABLE_NETWORK_CONNECT: "${{ matrix.target == 'network-segmentation' }}" ADVERTISED_UDN_ISOLATION_MODE: "${{ matrix.advertised-udn-isolation-mode }}" + # Override PARALLEL=true for Serial tests target to run Serial tests + PARALLEL: "${{ matrix.target != 'serial' }}" OVN_UNPRIVILEGED_MODE: "${{ matrix.cni-mode == 'unprivileged' }}" MULTI_POD_SUBNET: true steps: @@ -676,6 +681,9 @@ jobs: make -C test control-plane WHAT="ClusterNetworkConnect" elif [ "${{ matrix.target }}" == "bgp" ] || [ "${{ matrix.target }}" == "bgp-loose-isolation" ]; then make -C test control-plane + elif [ "${{ matrix.target }}" == "serial" ]; then + # Run only Serial tests with ginkgo focus + make -C test control-plane WHAT=Serial elif [ "${{ matrix.target }}" == "tools" ]; then make -C go-controller build make -C test tools diff --git a/GOVERNANCE.md b/GOVERNANCE.md index 83d8e01b47..06cac4ba3d 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -1,38 +1,41 @@ # ovn-kubernetes Project Governance -The ovn-kubernetes project is dedicated to creating a robust Kubernetes Networking platform built from the ground up by leveraging Open vSwitch (OVS) as the data plane, and Open Virtual Network (OVN) as the SDN Controller. The project focuses strictly on enhancing networking for the Kubernetes platform and includes a wide variety of features that are critical to enterprise and telco users. +The ovn-kubernetes project is dedicated to creating a robust Kubernetes Networking platform built from the ground up by leveraging Open vSwitch (OVS) as the data plane, and Open Virtual Network (OVN) as the SDN Controller. The project focuses strictly on enhancing networking for the Kubernetes platform and includes a wide variety of features that are critical to enterprise and telco users. This governance explains how the project is run. - [Values](#values) - [Maintainers](#maintainers) -- [Becoming a Maintainer](#becoming-a-maintainer) + - [Becoming a Maintainer](#becoming-a-maintainer) + - [Removing a Maintainer](#removing-a-maintainer) +- [Members](#members) + - [Becoming a Member](#becoming-a-member) + - [Removing a Member](#removing-a-member) - [Meetings](#meetings) -- [CNCF Resources](#cncf-resources) -- [Code of Conduct Enforcement](#code-of-conduct) +- [Code of Conduct](#code-of-conduct) - [Security Response Team](#security-response-team) - [Voting](#voting) -- [Modifications](#modifying-this-charter) +- [Modifying this Charter](#modifying-this-charter) ## Values The ovn-kubernetes and its leadership embrace the following values: -* Openness: Communication and decision-making happens in the open and is discoverable for future +- Openness: Communication and decision-making happens in the open and is discoverable for future reference. As much as possible, all discussions and work take place in public forums and open repositories. -* Fairness: All stakeholders have the opportunity to provide feedback and submit +- Fairness: All stakeholders have the opportunity to provide feedback and submit contributions, which will be considered on their merits. -* Community over Product or Company: Sustaining and growing our community takes +- Community over Product or Company: Sustaining and growing our community takes priority over shipping code or sponsors' organizational goals. Each contributor participates in the project as an individual. -* Inclusivity: We innovate through different perspectives and skill sets, which +- Inclusivity: We innovate through different perspectives and skill sets, which can only be accomplished in a welcoming and respectful environment. -* Participation: Responsibilities within the project are earned through +- Participation: Responsibilities within the project are earned through participation, and there is a clear path up the contributor ladder into leadership positions. @@ -60,15 +63,15 @@ is the governing body for the project. To become a Maintainer you need to demonstrate the following: - * commitment to the project: - * participate in discussions, contributions, code and documentation reviews - for 10 months or more, - * perform reviews for 10 non-trivial pull requests, - * contribute 15 non-trivial pull requests and have them merged, - * ability to write quality code and/or documentation, - * ability to collaborate with the team, - * understanding of how the team works (policies, processes for testing and code review, etc), - * understanding of the project's code base and coding and documentation style. +- commitment to the project: + - participate in discussions, contributions, code and documentation reviews + for 10 months or more, + - perform reviews for 10 non-trivial pull requests, + - contribute 15 non-trivial pull requests and have them merged, +- ability to write quality code and/or documentation, +- ability to collaborate with the team, +- understanding of how the team works (policies, processes for testing and code review, etc), +- understanding of the project's code base and coding and documentation style. A new Maintainer must be proposed by an existing maintainer by sending a message to the [developer mailing list](https://groups.google.com/g/ovn-kubernetes). A simple majority vote of existing Maintainers @@ -94,6 +97,46 @@ Depending on the reason for removal, a Maintainer may be converted to Emeritus status. Emeritus Maintainers will still be consulted on some project matters, and can be rapidly returned to Maintainer status if their availability changes. +## Members + +Members are active contributors who have shown a commitment to the project. They +have privileges to review pull requests and are part of the +`ovn-kubernetes/ovn-kubernetes-members` GitHub team, which makes them eligible +for automatic PR review assignments. Members are not Maintainers, but they are +expected to contribute to the project and collaborate with the team. + +### Becoming a Member + +To become a Member, you need to demonstrate the following: +- commitment to the project: + - participate in discussions, contributions, code and documentation reviews + for 3 months or more, + - perform reviews for 5 non-trivial pull requests, + - contribute 10 non-trivial pull requests and have them merged, +- ability to write quality code and/or documentation, +- ability to collaborate with the team (e.g., participate in project meetings, + join discussion in the CNCF slack channel, etc.), +- understanding of how the team works (policies, processes for testing and + code review, etc), +- understanding of the project's code base and coding and documentation style. + +A new Member must be proposed by an existing maintainer by sending a message to +the developer mailing list. The application is approved with two affirmative +votes from current maintainers. + +### Removing a Member + +Members may resign at any time. + +Members may also be removed after being inactive for a period of 6 months or +more, for failure to fulfill their responsibilities, or for violating the Code +of Conduct. A Member may be removed at any time by a simple majority vote of the +maintainers. + +Members who are consistently unresponsive to assigned PR reviews may be +contacted by Maintainers to discuss their availability and commitment. If the +pattern of non-responsiveness continues, the Member may be removed. + ## Meetings Time zones permitting, Maintainers are expected to participate in the public diff --git a/contrib/kind-common b/contrib/kind-common index 183611a353..208f6965c6 100644 --- a/contrib/kind-common +++ b/contrib/kind-common @@ -857,7 +857,10 @@ install_ffr_k8s() { echo "Attempting to reach frr-k8s webhook" kind export kubeconfig --name ovn while true; do -$OCI_BIN exec ovn-control-plane curl -ksS --connect-timeout 0.1 https://$(kubectl get svc -n frr-k8s-system frr-k8s-webhook-service -o jsonpath='{.spec.clusterIP}') +CLUSTER_IP=\$(kubectl get svc -n frr-k8s-system frr-k8s-webhook-service -o jsonpath='{.spec.clusterIP}') +# Wrap IPv6 addresses in brackets for URL syntax +[[ \${CLUSTER_IP} =~ : ]] && CLUSTER_IP="[\${CLUSTER_IP}]" +$OCI_BIN exec ovn-control-plane curl -ksS --connect-timeout 0.1 https://\${CLUSTER_IP} [ \$? -eq 0 ] && exit 0 echo "Couldn't reach frr-k8s webhook, trying in 1s..." sleep 1s @@ -916,3 +919,29 @@ interconnect_arg_check() { echo "INFO: Interconnect mode is now the default mode, you do not need to use pass -ic or --enable-interconnect anymore" fi } + +setup_coredumps() { + # Setup core dump collection + # + # Core dumps will be saved on the HOST at /tmp/kind/logs/coredumps (not inside containers) + # because kernel.core_pattern is a kernel-level setting shared across all containers. + # + # - Using a pipe instead of a file path avoids needing to mount + # /tmp/kind/logs/coredumps into every container that might crash + # - The pipe executes in the host's namespace, so /tmp/kind/logs/coredumps + # automatically refers to the host path + # + # Location: /tmp/kind/logs is used to ensure coredumps are exported in CI + # Use container exec to avoid asking for root permissions + + mkdir -p "/tmp/kind/logs/coredumps" + ulimit -c unlimited + for node in $(kind get nodes --name "${KIND_CLUSTER_NAME}"); do + # Core dump filename pattern variables: + # %P - global PID + # %e - executable filename + # %h - hostname (container hostname) + # %s - signal number that caused dump + ${OCI_BIN} exec "$node" sysctl -w kernel.core_pattern="|/bin/dd of=/tmp/kind/logs/coredumps/core.%P.%e.%h.%s bs=1M status=none" + done +} diff --git a/contrib/kind-helm.sh b/contrib/kind-helm.sh index 3e4ba8942c..a6dc8d9c9c 100755 --- a/contrib/kind-helm.sh +++ b/contrib/kind-helm.sh @@ -27,6 +27,7 @@ set_default_params() { export KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} export ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} export ENABLE_NETWORK_SEGMENTATION=${ENABLE_NETWORK_SEGMENTATION:-false} + export ENABLE_NETWORK_CONNECT=${ENABLE_NETWORK_CONNECT:-false} export ENABLE_PRE_CONF_UDN_ADDR=${ENABLE_PRE_CONF_UDN_ADDR:-false} export OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} export KIND_NUM_WORKER=${KIND_NUM_WORKER:-2} @@ -88,6 +89,7 @@ set_default_params() { export OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} export MULTI_POD_SUBNET=${MULTI_POD_SUBNET:-false} + export ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} } usage() { @@ -104,12 +106,14 @@ usage() { echo " [ -ikv | --install-kubevirt ]" echo " [ -mne | --multi-network-enable ]" echo " [ -nse | --network-segmentation-enable ]" + echo " [ -nce | --network-connect-enable ]" echo " [ -uae | --preconfigured-udn-addresses-enable ]" echo " [ -nqe | --network-qos-enable ]" echo " [ -wk | --num-workers ]" echo " [ -ic | --enable-interconnect]" echo " [ -npz | --node-per-zone ]" echo " [ -cn | --cluster-name ]" + echo " [ --enable-coredumps ]" echo " [ -h ]" echo "" echo "--delete Delete current cluster" @@ -127,11 +131,13 @@ usage() { echo "-ikv | --install-kubevirt Install kubevirt" echo "-mne | --multi-network-enable Enable multi networks. DEFAULT: Disabled" echo "-nse | --network-segmentation-enable Enable network segmentation. DEFAULT: Disabled" + echo "-nce | --network-connect-enable Enable network connect (requires network segmentation). DEFAULT: Disabled" echo "-uae | --preconfigured-udn-addresses-enable Enable connecting workloads with preconfigured network to user-defined networks. DEFAULT: Disabled" echo "-nqe | --network-qos-enable Enable network QoS. DEFAULT: Disabled" echo "-ha | --ha-enabled Enable high availability. DEFAULT: HA Disabled" echo "-wk | --num-workers Number of worker nodes. DEFAULT: 2 workers" echo "-cn | --cluster-name Configure the kind cluster's name" + echo "--enable-coredumps Enable coredump collection on kind nodes. DEFAULT: Disabled" echo "-dns | --enable-dnsnameresolver Enable DNSNameResolver for resolving the DNS names used in the DNS rules of EgressFirewall." echo "-ce | --enable-central Deploy with OVN Central (Legacy Architecture)" echo "-npz | --nodes-per-zone Specify number of nodes per zone (Default 0, which means global zone; >0 means interconnect zone, where 1 for single-node zone, >1 for multi-node zone). If this value > 1, then (total k8s nodes (workers + 1) / num of nodes per zone) should be zero." @@ -176,6 +182,8 @@ parse_args() { ;; -nse | --network-segmentation-enable) ENABLE_NETWORK_SEGMENTATION=true ;; + -nce | --network-connect-enable ) ENABLE_NETWORK_CONNECT=true + ;; -uae | --preconfigured-udn-addresses-enable) ENABLE_PRE_CONF_UDN_ADDR=true ;; -nqe | --network-qos-enable ) OVN_NETWORK_QOS_ENABLE=true @@ -214,6 +222,8 @@ parse_args() { ;; -mps| --multi-pod-subnet ) MULTI_POD_SUBNET=true ;; + --enable-coredumps ) ENABLE_COREDUMPS=true + ;; * ) usage exit 1 esac @@ -244,6 +254,7 @@ print_params() { echo "KIND_REMOVE_TAINT = $KIND_REMOVE_TAINT" echo "ENABLE_MULTI_NET = $ENABLE_MULTI_NET" echo "ENABLE_NETWORK_SEGMENTATION = $ENABLE_NETWORK_SEGMENTATION" + echo "ENABLE_NETWORK_CONNECT = $ENABLE_NETWORK_CONNECT" echo "ENABLE_PRE_CONF_UDN_ADDR = $ENABLE_PRE_CONF_UDN_ADDR" echo "OVN_NETWORK_QOS_ENABLE = $OVN_NETWORK_QOS_ENABLE" echo "OVN_IMAGE = $OVN_IMAGE" @@ -295,22 +306,9 @@ build_ovn_image() { return fi - # Build ovn image - pushd ${DIR}/../go-controller - make - popd - # Build ovn kube image pushd ${DIR}/../dist/images - # Find all built executables, but ignore the 'windows' directory if it exists - find ../../go-controller/_output/go/bin/ -maxdepth 1 -type f -exec cp -f {} . \; - echo "ref: $(git rev-parse --symbolic-full-name HEAD) commit: $(git rev-parse HEAD)" > git_info - $OCI_BIN build \ - --build-arg http_proxy="$http_proxy" \ - --build-arg https_proxy="$https_proxy" \ - --network=host \ - -t "${OVN_IMAGE}" \ - -f Dockerfile.fedora . + make fedora-image popd } @@ -461,12 +459,14 @@ helm install ovn-kubernetes . -f "${value_file}" \ --set global.enableMulticast=$(if [ "${OVN_MULTICAST_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableMultiNetwork=$(if [ "${ENABLE_MULTI_NET}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableNetworkSegmentation=$(if [ "${ENABLE_NETWORK_SEGMENTATION}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableNetworkConnect=$(if [ "${ENABLE_NETWORK_CONNECT}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enablePreconfiguredUDNAddresses=$(if [ "${ENABLE_PRE_CONF_UDN_ADDR}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableHybridOverlay=$(if [ "${OVN_HYBRID_OVERLAY_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableObservability=$(if [ "${OVN_OBSERV_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.emptyLbEvents=$(if [ "${OVN_EMPTY_LB_EVENTS}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableDNSNameResolver=$(if [ "${OVN_ENABLE_DNSNAMERESOLVER}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableNetworkQos=$(if [ "${OVN_NETWORK_QOS_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableCoredumps=$(if [ "${ENABLE_COREDUMPS}" == "true" ]; then echo "true"; else echo "false"; fi) \ ${ovnkube_db_options} EOF ) @@ -495,6 +495,9 @@ print_params helm_prereqs build_ovn_image create_kind_cluster +if [ "$ENABLE_COREDUMPS" == true ]; then + setup_coredumps +fi detect_apiserver_url docker_disable_ipv6 coredns_patch diff --git a/contrib/kind.sh b/contrib/kind.sh index 9ac34e4dec..5231c5381b 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -51,12 +51,14 @@ usage() { echo " [-is | --ipsec]" echo " [-cm | --compact-mode]" echo " [-ic | --enable-interconnect]" + echo " [-nce | --network-connect-enable]" echo " [-uae | --preconfigured-udn-addresses-enable]" echo " [-rae | --enable-route-advertisements]" echo " [-rud | --routed-udn-isolation-disable]" echo " [-adv | --advertise-default-network]" echo " [-nqe | --network-qos-enable]" echo " [--isolated]" + echo " [--enable-coredumps]" echo " [-dns | --enable-dnsnameresolver]" echo " [-obs | --observability]" echo " [-h]]" @@ -120,6 +122,7 @@ echo "--disable-ovnkube-identity Disable per-node cert and ov echo "-npz | --nodes-per-zone If interconnect is enabled, number of nodes per zone (Default 1). If this value > 1, then (total k8s nodes (workers + 1) / num of nodes per zone) should be zero." echo "-mtu Define the overlay mtu" echo "--isolated Deploy with an isolated environment (no default gateway)" +echo "--enable-coredumps Enable coredump collection on kind nodes. DEFAULT: Disabled." echo "--delete Delete current cluster" echo "--deploy Deploy ovn-kubernetes without restarting kind" echo "--add-nodes Adds nodes to an existing cluster. The number of nodes to be added is specified by --num-workers. Also use -ic if the cluster is using interconnect." @@ -309,10 +312,14 @@ parse_args() { ;; --isolated ) OVN_ISOLATED=true ;; + --enable-coredumps ) ENABLE_COREDUMPS=true + ;; -mne | --multi-network-enable ) ENABLE_MULTI_NET=true ;; -nse | --network-segmentation-enable) ENABLE_NETWORK_SEGMENTATION=true ;; + -nce | --network-connect-enable ) ENABLE_NETWORK_CONNECT=true + ;; -uae | --preconfigured-udn-addresses-enable) ENABLE_PRE_CONF_UDN_ADDR=true ;; -rae | --route-advertisements-enable) ENABLE_ROUTE_ADVERTISEMENTS=true @@ -423,6 +430,7 @@ print_params() { echo "OVN_ISOLATED = $OVN_ISOLATED" echo "ENABLE_MULTI_NET = $ENABLE_MULTI_NET" echo "ENABLE_NETWORK_SEGMENTATION= $ENABLE_NETWORK_SEGMENTATION" + echo "ENABLE_NETWORK_CONNECT = $ENABLE_NETWORK_CONNECT" echo "ENABLE_ROUTE_ADVERTISEMENTS= $ENABLE_ROUTE_ADVERTISEMENTS" echo "ADVERTISED_UDN_ISOLATION_MODE= $ADVERTISED_UDN_ISOLATION_MODE" echo "ADVERTISE_DEFAULT_NETWORK = $ADVERTISE_DEFAULT_NETWORK" @@ -677,6 +685,11 @@ set_default_params() { echo "Preconfigured UDN addresses requires interconnect to be enabled (-ic)" exit 1 fi + ENABLE_NETWORK_CONNECT=${ENABLE_NETWORK_CONNECT:-false} + if [[ $ENABLE_NETWORK_CONNECT == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then + echo "Network connect requires network-segmentation to be enabled (-nse)" + exit 1 + fi ADVERTISED_UDN_ISOLATION_MODE=${ADVERTISED_UDN_ISOLATION_MODE:-strict} ADVERTISE_DEFAULT_NETWORK=${ADVERTISE_DEFAULT_NETWORK:-false} OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} @@ -686,6 +699,7 @@ set_default_params() { OVN_MTU=${OVN_MTU:-1400} OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} OVN_OBSERV_ENABLE=${OVN_OBSERV_ENABLE:-false} + ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} } check_ipv6() { @@ -920,6 +934,7 @@ create_ovn_kube_manifests() { --ovn-loglevel-sb="${OVN_LOG_LEVEL_SB}" \ --ovn-loglevel-controller="${OVN_LOG_LEVEL_CONTROLLER}" \ --ovnkube-libovsdb-client-logfile="${LIBOVSDB_CLIENT_LOGFILE}" \ + --enable-coredumps="${ENABLE_COREDUMPS}" \ --ovnkube-config-duration-enable=true \ --admin-network-policy-enable=true \ --egress-ip-enable=true \ @@ -936,6 +951,7 @@ create_ovn_kube_manifests() { --ex-gw-network-interface="${OVN_EX_GW_NETWORK_INTERFACE}" \ --multi-network-enable="${ENABLE_MULTI_NET}" \ --network-segmentation-enable="${ENABLE_NETWORK_SEGMENTATION}" \ + --network-connect-enable="${ENABLE_NETWORK_CONNECT}" \ --preconfigured-udn-addresses-enable="${ENABLE_PRE_CONF_UDN_ADDR}" \ --route-advertisements-enable="${ENABLE_ROUTE_ADVERTISEMENTS}" \ --advertise-default-network="${ADVERTISE_DEFAULT_NETWORK}" \ @@ -1035,7 +1051,9 @@ install_ovn() { run_kubectl apply -f k8s.ovn.org_userdefinednetworks.yaml run_kubectl apply -f k8s.ovn.org_clusteruserdefinednetworks.yaml run_kubectl apply -f k8s.ovn.org_routeadvertisements.yaml - run_kubectl apply -f k8s.ovn.org_clusternetworkconnects.yaml + if [ "$ENABLE_NETWORK_CONNECT" == true ]; then + run_kubectl apply -f k8s.ovn.org_clusternetworkconnects.yaml + fi # NOTE: When you update vendoring versions for the ANP & BANP APIs, we must update the version of the CRD we pull from in the below URL run_kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/network-policy-api/v0.1.5/config/crd/experimental/policy.networking.k8s.io_adminnetworkpolicies.yaml run_kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/network-policy-api/v0.1.5/config/crd/experimental/policy.networking.k8s.io_baselineadminnetworkpolicies.yaml @@ -1214,6 +1232,9 @@ check_ipv6 set_cluster_cidr_ip_families if [ "$KIND_CREATE" == true ]; then create_kind_cluster + if [ "$ENABLE_COREDUMPS" == true ]; then + setup_coredumps + fi if [ "$RUN_IN_CONTAINER" == true ]; then run_script_in_container fi diff --git a/dist/images/Dockerfile.fedora b/dist/images/Dockerfile.fedora index 3a3de4f8d7..a2d51a3976 100644 --- a/dist/images/Dockerfile.fedora +++ b/dist/images/Dockerfile.fedora @@ -8,14 +8,30 @@ # This is for a development build where the ovn-kubernetes utilities # are built locally and included in the image (instead of the rpm) # - ARG OVN_FROM=koji +ARG BUILDER_IMAGE +ARG OVN_KUBERNETES_DIR=. + +############################################# +# Stage to build OVN Kubernetes from Source # +############################################# + +FROM --platform=${BUILDPLATFORM} ${BUILDER_IMAGE} AS ovnkube-builder + +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +RUN apt-get update && apt-get install -y -qq make git +COPY ${OVN_KUBERNETES_DIR} ovn-kubernetes +RUN cd ovn-kubernetes/dist/images && \ + CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} make bld + ############################################# # Stage to get OVN and OVS RPMs from source # ############################################# FROM quay.io/fedora/fedora:42 AS ovnbuilder - USER root ENV PYTHONDONTWRITEBYTECODE yes @@ -79,7 +95,7 @@ RUN git log -n 1 # Stage to download OVN RPMs from koji # ######################################## FROM quay.io/fedora/fedora:42 AS kojidownloader -ARG ovnver=ovn-25.09.0-42.fc42 +ARG ovnver=ovn-25.09.2-2.fc42 USER root @@ -138,24 +154,22 @@ RUN rpm -Uhv --nodeps --force /*.rpm # Built in ../../go_controller, then the binaries are copied here. # put things where they are in the pkg RUN mkdir -p /usr/libexec/cni/ -COPY ovnkube ovn-kube-util ovndbchecker hybrid-overlay-node ovnkube-identity ovnkube-observ /usr/bin/ -COPY ovn-k8s-cni-overlay /usr/libexec/cni/ovn-k8s-cni-overlay +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/ovnkube /workspace/ovn-kubernetes/dist/images/ovn-kube-util /workspace/ovn-kubernetes/dist/images/ovndbchecker /workspace/ovn-kubernetes/dist/images/hybrid-overlay-node /workspace/ovn-kubernetes/dist/images/ovnkube-identity /workspace/ovn-kubernetes/dist/images/ovnkube-observ /usr/bin/ +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/git_info /root +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/ovn-k8s-cni-overlay /usr/libexec/cni/ovn-k8s-cni-overlay # ovnkube.sh is the entry point. This script examines environment # variables to direct operation and configure ovn -COPY ovnkube.sh /root/ -COPY ovndb-raft-functions.sh /root/ - -# copy git commit number into image -COPY git_info /root +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/ovnkube.sh /root/ +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/ovndb-raft-functions.sh /root/ # iptables wrappers -COPY ./iptables-scripts/iptables /usr/sbin/ -COPY ./iptables-scripts/iptables-save /usr/sbin/ -COPY ./iptables-scripts/iptables-restore /usr/sbin/ -COPY ./iptables-scripts/ip6tables /usr/sbin/ -COPY ./iptables-scripts/ip6tables-save /usr/sbin/ -COPY ./iptables-scripts/ip6tables-restore /usr/sbin/ +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/iptables-scripts/iptables /usr/sbin/ +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/iptables-scripts/iptables-save /usr/sbin/ +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/iptables-scripts/iptables-restore /usr/sbin/ +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/iptables-scripts/ip6tables /usr/sbin/ +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/iptables-scripts/ip6tables-save /usr/sbin/ +COPY --from=ovnkube-builder /workspace/ovn-kubernetes/dist/images/iptables-scripts/ip6tables-restore /usr/sbin/ LABEL io.k8s.display-name="ovn-kubernetes" \ io.k8s.description="This is a Kubernetes network plugin that provides an overlay network using OVN." \ diff --git a/dist/images/Makefile b/dist/images/Makefile index 9e7397e6ab..ee62016d97 100644 --- a/dist/images/Makefile +++ b/dist/images/Makefile @@ -8,10 +8,10 @@ # The registry is configured in /etc/containers/registries.conf # on each node in both "registries:" and "insecure_registries:" sections. -all: ubuntu fedora +all: ubuntu-image fedora-image SLASH = - -ARCH = $(subst aarch64,arm64,$(subst x86_64,amd64,$(patsubst i%86,386,$(shell uname -m)))) +ARCH ?= $(subst aarch64,arm64,$(subst x86_64,amd64,$(patsubst i%86,386,$(shell uname -m)))) IMAGE_ARCH = $(SLASH)$(ARCH) DOCKERFILE_ARCH = ifeq ($(ARCH),arm64) @@ -27,7 +27,8 @@ else OVN_FROM := source OVN_GITSHA := $(shell git ls-remote "${OVN_REPO}" "${OVN_GITREF}" | sort -k2 -V |tail -1 | awk '{ print $$1 }') endif - +GO_VERSION ?= 1.24 +GO_IMAGE = quay.io/lib/golang:${GO_VERSION} OCI_BIN ?= docker @@ -45,13 +46,16 @@ ubuntu-shared-gw-deployment: ubuntu-image # ${OCI_BIN} push docker.io/ovnkube/ovn-daemonset-ubuntu:latest ./daemonset.sh --image=docker.io/ovnkube/ovn-daemonset-ubuntu:latest -fedora-image: bld +fedora-image: ${OCI_BIN} build \ --build-arg OVN_FROM=${OVN_FROM} \ --build-arg OVN_REPO=${OVN_REPO} \ --build-arg OVN_GITREF=${OVN_GITSHA} \ + --build-arg OVN_KUBERNETES_DIR=${OVN_KUBERNETES_DIR} \ + --build-arg BUILDER_IMAGE=${GO_IMAGE} \ + --platform=linux/${ARCH} \ -t ${IMAGE} \ - -f Dockerfile.fedora . + -f Dockerfile.fedora ./../.. fedora-shared-gw-deployment: fedora-image # ${OCI_BIN} login -u ovnkube docker.io/ovnkube diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index f7b396c5b8..cb8a0aa699 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -68,9 +68,9 @@ OVN_EGRESSIP_HEALTHCHECK_PORT= OVN_EGRESSFIREWALL_ENABLE= OVN_EGRESSQOS_ENABLE= OVN_EGRESSSERVICE_ENABLE= -OVN_DISABLE_OVN_IFACE_ID_VER="false" OVN_MULTI_NETWORK_ENABLE= OVN_NETWORK_SEGMENTATION_ENABLE= +OVN_NETWORK_CONNECT_ENABLE= OVN_PRE_CONF_UDN_ADDR_ENABLE= OVN_ROUTE_ADVERTISEMENTS_ENABLE= OVN_ADVERTISE_DEFAULT_NETWORK= @@ -107,6 +107,7 @@ IN_UPGRADE= # northd-backoff-interval, in ms OVN_NORTHD_BACKOFF_INTERVAL= OVN_OBSERV_ENABLE="false" +ENABLE_COREDUMPS="false" # Parse parameters given as arguments to this script. while [ "$1" != "" ]; do @@ -260,9 +261,6 @@ while [ "$1" != "" ]; do --egress-ip-healthcheck-port) OVN_EGRESSIP_HEALTHCHECK_PORT=$VALUE ;; - --disabe-ovn-iface-id-ver) - OVN_DISABLE_OVN_IFACE_ID_VER=$VALUE - ;; --egress-firewall-enable) OVN_EGRESSFIREWALL_ENABLE=$VALUE ;; @@ -275,6 +273,9 @@ while [ "$1" != "" ]; do --network-segmentation-enable) OVN_NETWORK_SEGMENTATION_ENABLE=$VALUE ;; + --network-connect-enable) + OVN_NETWORK_CONNECT_ENABLE=$VALUE + ;; --preconfigured-udn-addresses-enable) OVN_PRE_CONF_UDN_ADDR_ENABLE=$VALUE ;; @@ -380,6 +381,9 @@ while [ "$1" != "" ]; do --enable-observ) OVN_OBSERV_ENABLE=$VALUE ;; + --enable-coredumps) + ENABLE_COREDUMPS=$VALUE + ;; --no-hostsubnet-label) OVN_NOHOSTSUBNET_LABEL=$VALUE ;; @@ -470,12 +474,12 @@ ovn_egress_qos_enable=${OVN_EGRESSQOS_ENABLE} echo "ovn_egress_qos_enable: ${ovn_egress_qos_enable}" ovn_egress_service_enable=${OVN_EGRESSSERVICE_ENABLE} echo "ovn_egress_service_enable: ${ovn_egress_service_enable}" -ovn_disable_ovn_iface_id_ver=${OVN_DISABLE_OVN_IFACE_ID_VER} -echo "ovn_disable_ovn_iface_id_ver: ${ovn_disable_ovn_iface_id_ver}" ovn_multi_network_enable=${OVN_MULTI_NETWORK_ENABLE} echo "ovn_multi_network_enable: ${ovn_multi_network_enable}" ovn_network_segmentation_enable=${OVN_NETWORK_SEGMENTATION_ENABLE} echo "ovn_network_segmentation_enable: ${ovn_network_segmentation_enable}" +ovn_network_connect_enable=${OVN_NETWORK_CONNECT_ENABLE} +echo "ovn_network_connect_enable: ${ovn_network_connect_enable}" ovn_pre_conf_udn_addr_enable=${OVN_PRE_CONF_UDN_ADDR_ENABLE} echo "ovn_pre_conf_udn_addr_enable: ${ovn_pre_conf_udn_addr_enable}" ovn_route_advertisements_enable=${OVN_ROUTE_ADVERTISEMENTS_ENABLE} @@ -590,6 +594,9 @@ echo "ovn_enable_dnsnameresolver: ${ovn_enable_dnsnameresolver}" ovn_observ_enable=${OVN_OBSERV_ENABLE} echo "ovn_observ_enable: ${ovn_observ_enable}" +enable_coredumps=${ENABLE_COREDUMPS} +echo "enable_coredumps: ${enable_coredumps}" + ovn_nohostsubnet_label=${OVN_NOHOSTSUBNET_LABEL} echo "ovn_nohostsubnet_label: ${ovn_nohostsubnet_label}" @@ -624,6 +631,7 @@ ovn_image=${ovnkube_image} \ ovn_egress_ip_healthcheck_port=${ovn_egress_ip_healthcheck_port} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ + ovn_network_connect_enable=${ovn_network_connect_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ @@ -642,13 +650,13 @@ ovn_image=${ovnkube_image} \ ovn_ipfix_cache_max_flows=${ovn_ipfix_cache_max_flows} \ ovn_ipfix_cache_active_timeout=${ovn_ipfix_cache_active_timeout} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ - ovn_disable_ovn_iface_id_ver=${ovn_disable_ovn_iface_id_ver} \ ovnkube_node_mgmt_port_netdev=${ovnkube_node_mgmt_port_netdev} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_observ_enable=${ovn_observ_enable} \ ovn_network_qos_enable=${ovn_network_qos_enable} \ + enable_coredumps=${enable_coredumps} \ ovnkube_app_name=ovnkube-node \ jinjanate ../templates/ovnkube-node.yaml.j2 -o ${output_dir}/ovnkube-node.yaml @@ -680,6 +688,7 @@ ovn_image=${ovnkube_image} \ ovn_egress_ip_healthcheck_port=${ovn_egress_ip_healthcheck_port} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ + ovn_network_connect_enable=${ovn_network_connect_enable} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ @@ -697,7 +706,6 @@ ovn_image=${ovnkube_image} \ ovn_ipfix_cache_max_flows=${ovn_ipfix_cache_max_flows} \ ovn_ipfix_cache_active_timeout=${ovn_ipfix_cache_active_timeout} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ - ovn_disable_ovn_iface_id_ver=${ovn_disable_ovn_iface_id_ver} \ ovnkube_node_mgmt_port_netdev=${ovnkube_node_mgmt_port_netdev} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ @@ -780,6 +788,7 @@ ovn_image=${ovnkube_image} \ ovn_egress_qos_enable=${ovn_egress_qos_enable} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ + ovn_network_connect_enable=${ovn_network_connect_enable} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ @@ -801,6 +810,7 @@ ovn_image=${ovnkube_image} \ ovn_observ_enable=${ovn_observ_enable} \ ovn_nohostsubnet_label=${ovn_nohostsubnet_label} \ ovn_disable_requestedchassis=${ovn_disable_requestedchassis} \ + enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-master.yaml.j2 -o ${output_dir}/ovnkube-master.yaml ovn_image=${ovnkube_image} \ @@ -830,6 +840,7 @@ ovn_image=${ovnkube_image} \ ovn_egress_qos_enable=${ovn_egress_qos_enable} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ + ovn_network_connect_enable=${ovn_network_connect_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ @@ -847,6 +858,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ ovn_observ_enable=${ovn_observ_enable} \ + enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-control-plane.yaml.j2 -o ${output_dir}/ovnkube-control-plane.yaml ovn_image=${image} \ @@ -878,6 +890,7 @@ ovn_image=${image} \ ovn_sb_raft_port=${ovn_sb_raft_port} \ enable_ipsec=${enable_ipsec} \ ovn_northd_backoff_interval=${ovn_northd_backoff_interval} \ + enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-db-raft.yaml.j2 -o ${output_dir}/ovnkube-db-raft.yaml ovn_image=${ovnkube_image} \ @@ -912,6 +925,7 @@ ovn_image=${ovnkube_image} \ ovn_egress_qos_enable=${ovn_egress_qos_enable} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ + ovn_network_connect_enable=${ovn_network_connect_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ @@ -931,7 +945,6 @@ ovn_image=${ovnkube_image} \ ovn_ipfix_cache_active_timeout=${ovn_ipfix_cache_active_timeout} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ ovnkube_node_mgmt_port_netdev=${ovnkube_node_mgmt_port_netdev} \ - ovn_disable_ovn_iface_id_ver=${ovn_disable_ovn_iface_id_ver} \ ovnkube_master_loglevel=${master_loglevel} \ ovn_loglevel_northd=${ovn_loglevel_northd} \ ovn_loglevel_nbctld=${ovn_loglevel_nbctld} \ @@ -947,6 +960,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ ovn_observ_enable=${ovn_observ_enable} \ + enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-single-node-zone.yaml.j2 -o ${output_dir}/ovnkube-single-node-zone.yaml ovn_image=${ovnkube_image} \ @@ -981,6 +995,7 @@ ovn_image=${ovnkube_image} \ ovn_egress_qos_enable=${ovn_egress_qos_enable} \ ovn_multi_network_enable=${ovn_multi_network_enable} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ + ovn_network_connect_enable=${ovn_network_connect_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ @@ -999,7 +1014,6 @@ ovn_image=${ovnkube_image} \ ovn_ipfix_cache_active_timeout=${ovn_ipfix_cache_active_timeout} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ ovnkube_node_mgmt_port_netdev=${ovnkube_node_mgmt_port_netdev} \ - ovn_disable_ovn_iface_id_ver=${ovn_disable_ovn_iface_id_ver} \ ovnkube_master_loglevel=${master_loglevel} \ ovn_loglevel_northd=${ovn_loglevel_northd} \ ovn_loglevel_nbctld=${ovn_loglevel_nbctld} \ @@ -1015,6 +1029,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ ovn_observ_enable=${ovn_observ_enable} \ + enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-zone-controller.yaml.j2 -o ${output_dir}/ovnkube-zone-controller.yaml ovn_image=${image} \ @@ -1045,6 +1060,7 @@ ovn_image=${ovnkube_image} \ webhook_cert=$(cat "${path_prefix}.crt" | base64 -w0) \ ovn_enable_multi_node_zone=${ovn_enable_multi_node_zone} \ ovn_hybrid_overlay_enable=${ovn_hybrid_overlay_enable} \ + enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-identity.yaml.j2 -o ${output_dir}/ovnkube-identity.yaml if ${enable_ipsec}; then diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index c7b74ed670..6e71648346 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -263,12 +263,12 @@ ovn_egressfirewall_enable=${OVN_EGRESSFIREWALL_ENABLE:-false} ovn_egressqos_enable=${OVN_EGRESSQOS_ENABLE:-false} #OVN_EGRESSSERVICE_ENABLE - enable egress Service for ovn-kubernetes ovn_egressservice_enable=${OVN_EGRESSSERVICE_ENABLE:-false} -#OVN_DISABLE_OVN_IFACE_ID_VER - disable usage of the OVN iface-id-ver option -ovn_disable_ovn_iface_id_ver=${OVN_DISABLE_OVN_IFACE_ID_VER:-false} #OVN_MULTI_NETWORK_ENABLE - enable multiple network support for ovn-kubernetes ovn_multi_network_enable=${OVN_MULTI_NETWORK_ENABLE:-false} #OVN_NETWORK_SEGMENTATION_ENABLE - enable user defined primary networks for ovn-kubernetes ovn_network_segmentation_enable=${OVN_NETWORK_SEGMENTATION_ENABLE:=false} +#OVN_NETWORK_CONNECT_ENABLE - enable network connect for ovn-kubernetes +ovn_network_connect_enable=${OVN_NETWORK_CONNECT_ENABLE:=false} #OVN_PRE_CONF_UDN_ADDR_ENABLE - enable connecting workloads with custom network configuration to UDNs ovn_pre_conf_udn_addr_enable=${OVN_PRE_CONF_UDN_ADDR_ENABLE:=false} #OVN_NROUTE_ADVERTISEMENTS_ENABLE - enable route advertisements for ovn-kubernetes @@ -770,6 +770,16 @@ ovs-server() { USER_ARGS="--ovs-user=${ovs_user_id}" fi + # OVN-K marks NIC port as transient on startup when plugging it into ovs + # bridge. This is done so that on ovsdb-server resrtart, the NIC interface is + # detached from the bridge, which is necessary to restore connectivity + # through the NIC and make the node healthy. Marking the port as transient + # works only when we also start ovsdb-server with --delete-transient-ports. + # + # Note: once ovnkube is started, it will rewire the NIC port back into the + # bridge, and move IP configuration as necessary. + ovs_options="${ovs_options} --delete-transient-ports" + /usr/share/openvswitch/scripts/ovs-ctl start --no-ovs-vswitchd \ --system-id=random ${ovs_options} ${USER_ARGS} "$@" @@ -1601,6 +1611,12 @@ ovnkube-controller() { fi echo "network_segmentation_enabled_flag=${network_segmentation_enabled_flag}" + network_connect_enabled_flag= + if [[ ${ovn_network_connect_enable} == "true" ]]; then + network_connect_enabled_flag="--enable-network-connect" + fi + echo "network_connect_enabled_flag=${network_connect_enabled_flag}" + pre_conf_udn_addr_enable_flag= if [[ ${ovn_pre_conf_udn_addr_enable} == "true" ]]; then pre_conf_udn_addr_enable_flag="--enable-preconfigured-udn-addresses" @@ -1733,6 +1749,7 @@ ovnkube-controller() { ${multicast_enabled_flag} \ ${multi_network_enabled_flag} \ ${network_segmentation_enabled_flag} \ + ${network_connect_enabled_flag} \ ${pre_conf_udn_addr_enable_flag} \ ${route_advertisements_enabled_flag} \ ${advertised_udn_isolation_flag} \ @@ -1939,6 +1956,12 @@ ovnkube-controller-with-node() { fi echo "network_segmentation_enabled_flag=${network_segmentation_enabled_flag}" + network_connect_enabled_flag= + if [[ ${ovn_network_connect_enable} == "true" ]]; then + network_connect_enabled_flag="--enable-network-connect" + fi + echo "network_connect_enabled_flag=${network_connect_enabled_flag}" + pre_conf_udn_addr_enable_flag= if [[ ${ovn_pre_conf_udn_addr_enable} == "true" ]]; then pre_conf_udn_addr_enable_flag="--enable-preconfigured-udn-addresses" @@ -1963,11 +1986,6 @@ ovnkube-controller-with-node() { fi echo "egressservice_enabled_flag=${egressservice_enabled_flag}" - disable_ovn_iface_id_ver_flag= - if [[ ${ovn_disable_ovn_iface_id_ver} == "true" ]]; then - disable_ovn_iface_id_ver_flag="--disable-ovn-iface-id-ver" - fi - netflow_targets= if [[ -n ${ovn_netflow_targets} ]]; then netflow_targets="--netflow-targets ${ovn_netflow_targets}" @@ -2196,7 +2214,6 @@ ovnkube-controller-with-node() { /usr/bin/ovnkube --init-ovnkube-controller ${K8S_NODE} --init-node ${K8S_NODE} \ ${anp_enabled_flag} \ ${disable_forwarding_flag} \ - ${disable_ovn_iface_id_ver_flag} \ ${disable_pkt_mtu_check_flag} \ ${disable_snat_multiple_gws_flag} \ ${egressfirewall_enabled_flag} \ @@ -2217,6 +2234,7 @@ ovnkube-controller-with-node() { ${multicast_enabled_flag} \ ${multi_network_enabled_flag} \ ${network_segmentation_enabled_flag} \ + ${network_connect_enabled_flag} \ ${pre_conf_udn_addr_enable_flag} \ ${route_advertisements_enabled_flag} \ ${advertised_udn_isolation_flag} \ @@ -2386,6 +2404,12 @@ ovn-cluster-manager() { fi echo "network_segmentation_enabled_flag=${network_segmentation_enabled_flag}" + network_connect_enabled_flag= + if [[ ${ovn_network_connect_enable} == "true" ]]; then + network_connect_enabled_flag="--enable-network-connect" + fi + echo "network_connect_enabled_flag=${network_connect_enabled_flag}" + pre_conf_udn_addr_enable_flag= if [[ ${ovn_pre_conf_udn_addr_enable} == "true" ]]; then pre_conf_udn_addr_enable_flag="--enable-preconfigured-udn-addresses" @@ -2464,6 +2488,7 @@ ovn-cluster-manager() { ${multicast_enabled_flag} \ ${multi_network_enabled_flag} \ ${network_segmentation_enabled_flag} \ + ${network_connect_enabled_flag} \ ${pre_conf_udn_addr_enable_flag} \ ${route_advertisements_enabled_flag} \ ${advertised_udn_isolation_flag} \ @@ -2628,11 +2653,6 @@ ovn-node() { egressservice_enabled_flag="--enable-egress-service" fi - disable_ovn_iface_id_ver_flag= - if [[ ${ovn_disable_ovn_iface_id_ver} == "true" ]]; then - disable_ovn_iface_id_ver_flag="--disable-ovn-iface-id-ver" - fi - multi_network_enabled_flag= if [[ ${ovn_multi_network_enable} == "true" ]]; then multi_network_enabled_flag="--enable-multi-network --enable-multi-networkpolicy" @@ -2643,6 +2663,12 @@ ovn-node() { network_segmentation_enabled_flag="--enable-multi-network --enable-network-segmentation" fi + network_connect_enabled_flag= + if [[ ${ovn_network_connect_enable} == "true" ]]; then + network_connect_enabled_flag="--enable-network-connect" + fi + echo "network_connect_enabled_flag=${network_connect_enabled_flag}" + pre_conf_udn_addr_enable_flag= if [[ ${ovn_pre_conf_udn_addr_enable} == "true" ]]; then pre_conf_udn_addr_enable_flag="--enable-preconfigured-udn-addresses" @@ -2871,7 +2897,6 @@ ovn-node() { /usr/bin/ovnkube --init-node ${K8S_NODE} \ ${anp_enabled_flag} \ ${disable_forwarding_flag} \ - ${disable_ovn_iface_id_ver_flag} \ ${disable_pkt_mtu_check_flag} \ ${disable_snat_multiple_gws_flag} \ ${egress_interface} \ @@ -2888,6 +2913,7 @@ ovn-node() { ${multicast_enabled_flag} \ ${multi_network_enabled_flag} \ ${network_segmentation_enabled_flag} \ + ${network_connect_enabled_flag} \ ${pre_conf_udn_addr_enable_flag} \ ${route_advertisements_enabled_flag} \ ${advertised_udn_isolation_flag} \ diff --git a/dist/templates/k8s.ovn.org_clusternetworkconnects.yaml.j2 b/dist/templates/k8s.ovn.org_clusternetworkconnects.yaml.j2 index be7e61b131..3a981d1ea1 100644 --- a/dist/templates/k8s.ovn.org_clusternetworkconnects.yaml.j2 +++ b/dist/templates/k8s.ovn.org_clusternetworkconnects.yaml.j2 @@ -133,6 +133,13 @@ spec: - message: When 2 CIDRs are set, they must be from different IP families rule: size(self) != 2 || !isCIDR(self[0].cidr) || !isCIDR(self[1].cidr) || cidr(self[0].cidr).ip().family() != cidr(self[1].cidr).ip().family() + - message: 'For dual-stack, networkPrefix must have matching host + bits: (32 - ipv4NetworkPrefix) must equal (128 - ipv6NetworkPrefix)' + rule: 'size(self) != 2 || !isCIDR(self[0].cidr) || !isCIDR(self[1].cidr) + || cidr(self[0].cidr).ip().family() == cidr(self[1].cidr).ip().family() + || (cidr(self[0].cidr).ip().family() == 4 ? (32 - self[0].networkPrefix) + == (128 - self[1].networkPrefix) : (128 - self[0].networkPrefix) + == (32 - self[1].networkPrefix))' connectivity: description: connectivity specifies which connectivity types should be enabled for the connected networks. diff --git a/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 b/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 index 0fec693d48..bcb4af69ff 100644 --- a/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 +++ b/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 @@ -614,6 +614,29 @@ spec: IPv6 subnet is used rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, isCIDR(i) && cidr(i).ip().family() == 6) || self.mtu >= 1280' + noOverlayOptions: + description: |- + NoOverlayOptions contains configuration for no-overlay mode. + This is only allowed when Transport is "NoOverlay". + properties: + outboundSNAT: + description: OutboundSNAT defines the SNAT behavior for outbound + traffic from pods. + enum: + - Enabled + - Disabled + type: string + routing: + description: Routing specifies whether the pod network routing + is managed by OVN-Kubernetes or users. + enum: + - Managed + - Unmanaged + type: string + required: + - outboundSNAT + - routing + type: object topology: description: |- Topology describes network configuration. @@ -627,6 +650,17 @@ spec: - Layer3 - Localnet type: string + transport: + description: |- + Transport describes the transport technology for pod-to-pod traffic. + Allowed values are "NoOverlay" and "Geneve". + - "NoOverlay": The network operates in no-overlay mode. + - "Geneve": The network uses Geneve overlay. + When omitted, the default behaviour is Geneve. + enum: + - NoOverlay + - Geneve + type: string required: - topology type: object @@ -643,6 +677,16 @@ spec: forbidden otherwise rule: 'has(self.topology) && self.topology == ''Localnet'' ? has(self.localnet): !has(self.localnet)' + - message: transport 'NoOverlay' is only supported for Layer3 primary + networks + rule: '!has(self.transport) || self.transport != ''NoOverlay'' || + (self.topology == ''Layer3'' && has(self.layer3) && self.layer3.role + == ''Primary'')' + - message: noOverlayOptions is required when transport is 'NoOverlay' + rule: '!has(self.transport) || self.transport != ''NoOverlay'' || + has(self.noOverlayOptions)' + - message: noOverlayOptions is forbidden when transport is not 'NoOverlay' + rule: self.transport == 'NoOverlay' || !has(self.noOverlayOptions) - message: Network spec is immutable rule: self == oldSelf required: diff --git a/dist/templates/ovnkube-control-plane.yaml.j2 b/dist/templates/ovnkube-control-plane.yaml.j2 index 7d82178384..b85c3792df 100644 --- a/dist/templates/ovnkube-control-plane.yaml.j2 +++ b/dist/templates/ovnkube-control-plane.yaml.j2 @@ -84,6 +84,10 @@ spec: cpu: 100m memory: 300Mi env: + {% if (enable_coredumps | default("false")) == "true" -%} + - name: GOTRACEBACK + value: "crash" + {% endif -%} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL @@ -146,6 +150,8 @@ spec: value: "{{ ovn_multi_network_enable }}" - name: OVN_NETWORK_SEGMENTATION_ENABLE value: "{{ ovn_network_segmentation_enable }}" + - name: OVN_NETWORK_CONNECT_ENABLE + value: "{{ ovn_network_connect_enable }}" - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: "{{ ovn_pre_conf_udn_addr_enable }}" - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE diff --git a/dist/templates/ovnkube-identity.yaml.j2 b/dist/templates/ovnkube-identity.yaml.j2 index 1e1386d8f3..1c989088a9 100644 --- a/dist/templates/ovnkube-identity.yaml.j2 +++ b/dist/templates/ovnkube-identity.yaml.j2 @@ -52,6 +52,10 @@ spec: - mountPath: /etc/webhook-cert/ name: webhook-cert env: + {% if (enable_coredumps | default("false")) == "true" -%} + - name: GOTRACEBACK + value: "crash" + {% endif -%} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: K8S_APISERVER diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index 532216da7b..cc0783ee02 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -198,6 +198,10 @@ spec: cpu: 100m memory: 300Mi env: + {% if (enable_coredumps | default("false")) == "true" -%} + - name: GOTRACEBACK + value: "crash" + {% endif -%} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL diff --git a/dist/templates/ovnkube-node.yaml.j2 b/dist/templates/ovnkube-node.yaml.j2 index 4a81df0582..d129ca35a1 100644 --- a/dist/templates/ovnkube-node.yaml.j2 +++ b/dist/templates/ovnkube-node.yaml.j2 @@ -117,6 +117,10 @@ spec: cpu: 100m memory: 300Mi env: + {% if (enable_coredumps | default("false")) == "true" -%} + - name: GOTRACEBACK + value: "crash" + {% endif -%} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL @@ -222,8 +226,6 @@ spec: {% if ovnkube_app_name!="ovnkube-node-dpu-host" -%} - name: OVN_SSL_ENABLE value: "{{ ovn_ssl_en }}" - - name: OVN_DISABLE_OVN_IFACE_ID_VER - value: "{{ ovn_disable_ovn_iface_id_ver }}" - name: OVN_REMOTE_PROBE_INTERVAL value: "{{ ovn_remote_probe_interval }}" - name: OVN_MONITOR_ALL @@ -240,6 +242,8 @@ spec: value: "{{ ovn_multi_network_enable }}" - name: OVN_NETWORK_SEGMENTATION_ENABLE value: "{{ ovn_network_segmentation_enable }}" + - name: OVN_NETWORK_CONNECT_ENABLE + value: "{{ ovn_network_connect_enable }}" - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE value: "{{ ovn_route_advertisements_enable }}" - name: OVN_ADVERTISED_UDN_ISOLATION_MODE diff --git a/dist/templates/ovnkube-single-node-zone.yaml.j2 b/dist/templates/ovnkube-single-node-zone.yaml.j2 index d5d6e6dd21..866d5fe6e2 100644 --- a/dist/templates/ovnkube-single-node-zone.yaml.j2 +++ b/dist/templates/ovnkube-single-node-zone.yaml.j2 @@ -312,6 +312,10 @@ spec: cpu: 100m memory: 300Mi env: + {% if (enable_coredumps | default("false")) == "true" -%} + - name: GOTRACEBACK + value: "crash" + {% endif -%} - name: OVN_EGRESSSERVICE_ENABLE value: "{{ ovn_egress_service_enable }}" - name: OVN_DAEMONSET_VERSION @@ -422,8 +426,6 @@ spec: value: "{{ ovn_ex_gw_networking_interface }}" - name: OVN_SSL_ENABLE value: "{{ ovn_ssl_en }}" - - name: OVN_DISABLE_OVN_IFACE_ID_VER - value: "{{ ovn_disable_ovn_iface_id_ver }}" - name: OVN_REMOTE_PROBE_INTERVAL value: "{{ ovn_remote_probe_interval }}" - name: OVN_MONITOR_ALL @@ -440,6 +442,8 @@ spec: value: "{{ ovn_multi_network_enable }}" - name: OVN_NETWORK_SEGMENTATION_ENABLE value: "{{ ovn_network_segmentation_enable }}" + - name: OVN_NETWORK_CONNECT_ENABLE + value: "{{ ovn_network_connect_enable }}" - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: "{{ ovn_pre_conf_udn_addr_enable }}" - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE diff --git a/dist/templates/ovnkube-zone-controller.yaml.j2 b/dist/templates/ovnkube-zone-controller.yaml.j2 index d51cb82843..150f089b56 100644 --- a/dist/templates/ovnkube-zone-controller.yaml.j2 +++ b/dist/templates/ovnkube-zone-controller.yaml.j2 @@ -286,6 +286,10 @@ spec: cpu: 100m memory: 300Mi env: + {% if (enable_coredumps | default("false")) == "true" -%} + - name: GOTRACEBACK + value: "crash" + {% endif -%} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL @@ -345,6 +349,8 @@ spec: value: "{{ ovn_multi_network_enable }}" - name: OVN_NETWORK_SEGMENTATION_ENABLE value: "{{ ovn_network_segmentation_enable }}" + - name: OVN_NETWORK_CONNECT_ENABLE + value: "{{ ovn_network_connect_enable }}" - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: "{{ ovn_pre_conf_udn_addr_enable }}" - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE diff --git a/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 b/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 index 44f7020165..8aecfc43ca 100644 --- a/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 +++ b/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 @@ -77,6 +77,7 @@ rules: - clusteruserdefinednetworks - routeadvertisements - networkqoses + - clusternetworkconnects verbs: [ "get", "list", "watch" ] - apiGroups: ["k8s.ovn.org"] resources: @@ -88,6 +89,7 @@ rules: - clusteruserdefinednetworks/status - clusteruserdefinednetworks/finalizers - routeadvertisements/status + - clusternetworkconnects # Unlike core kubernetes objects, for CRDs there is strict enforcement of status subresource permissions. Since CM has to annotate CNC we need to provide full object patch permissions. verbs: [ "patch", "update" ] - apiGroups: [""] resources: diff --git a/docs/api-reference/userdefinednetwork-api-spec.md b/docs/api-reference/userdefinednetwork-api-spec.md index 4c679ffc14..30fd2832d7 100644 --- a/docs/api-reference/userdefinednetwork-api-spec.md +++ b/docs/api-reference/userdefinednetwork-api-spec.md @@ -43,6 +43,7 @@ _Validation:_ _Appears in:_ - [DualStackCIDRs](#dualstackcidrs) +- [Layer2Config](#layer2config) - [Layer3Subnet](#layer3subnet) - [LocalnetConfig](#localnetconfig) @@ -137,6 +138,34 @@ _Appears in:_ +#### DualStackIPs + +_Underlying type:_ _[IP](#ip)_ + + + +_Validation:_ +- MaxItems: 2 +- MinItems: 1 + +_Appears in:_ +- [Layer2Config](#layer2config) + + + +#### IP + +_Underlying type:_ _string_ + + + + + +_Appears in:_ +- [DualStackIPs](#dualstackips) + + + #### IPAMConfig @@ -153,7 +182,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `mode` _[IPAMMode](#ipammode)_ | Mode controls how much of the IP configuration will be managed by OVN.
`Enabled` means OVN-Kubernetes will apply IP configuration to the SDN infrastructure and it will also assign IPs
from the selected subnet to the individual pods.
`Disabled` means OVN-Kubernetes will only assign MAC addresses and provide layer 2 communication, letting users
configure IP addresses for the pods.
`Disabled` is only available for Secondary networks.
By disabling IPAM, any Kubernetes features that rely on selecting pods by IP will no longer function
(such as network policy, services, etc). Additionally, IP port security will also be disabled for interfaces attached to this network.
Defaults to `Enabled`. | | Enum: [Enabled Disabled]
| -| `lifecycle` _[NetworkIPAMLifecycle](#networkipamlifecycle)_ | Lifecycle controls IP addresses management lifecycle.

The only allowed value is Persistent. When set, the IP addresses assigned by OVN-Kubernetes will be persisted in an
`ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested.
Only supported when mode is `Enabled`. | | Enum: [Persistent]
| +| `lifecycle` _[NetworkIPAMLifecycle](#networkipamlifecycle)_ | Lifecycle controls IP addresses management lifecycle.
The only allowed value is Persistent. When set, the IP addresses assigned by OVN-Kubernetes will be persisted in an
`ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested.
Only supported when mode is `Enabled`. | | Enum: [Persistent]
| #### IPAMMode @@ -188,10 +217,13 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `role` _[NetworkRole](#networkrole)_ | Role describes the network role in the pod.

Allowed value is "Secondary".
Secondary network is only assigned to pods that use `k8s.v1.cni.cncf.io/networks` annotation to select given network. | | Enum: [Primary Secondary]
Required: \{\}
| +| `role` _[NetworkRole](#networkrole)_ | Role describes the network role in the pod.
Allowed value is "Secondary".
Secondary network is only assigned to pods that use `k8s.v1.cni.cncf.io/networks` annotation to select given network. | | Enum: [Primary Secondary]
Required: \{\}
| | `mtu` _integer_ | MTU is the maximum transmission unit for a network.
MTU is optional, if not provided, the globally configured value in OVN-Kubernetes (defaults to 1400) is used for the network. | | Maximum: 65536
Minimum: 576
| -| `subnets` _[DualStackCIDRs](#dualstackcidrs)_ | Subnets are used for the pod network across the cluster.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.

The format should match standard CIDR notation (for example, "10.128.0.0/16").
This field must be omitted if `ipam.mode` is `Disabled`. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| -| `joinSubnets` _[DualStackCIDRs](#dualstackcidrs)_ | JoinSubnets are used inside the OVN network topology.

Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, the platform will choose a reasonable default which is subject to change over time. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| +| `subnets` _[DualStackCIDRs](#dualstackcidrs)_ | Subnets are used for the pod network across the cluster.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
The format should match standard CIDR notation (for example, "10.128.0.0/16").
This field must be omitted if `ipam.mode` is `Disabled`. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| +| `reservedSubnets` _[CIDR](#cidr) array_ | reservedSubnets specifies a list of CIDRs reserved for static IP assignment, excluded from automatic allocation.
reservedSubnets is optional. When omitted, all IP addresses in `subnets` are available for automatic assignment.
IPs from these ranges can still be requested through static IP assignment.
Each item should be in range of the specified CIDR(s) in `subnets`.
The maximum number of entries allowed is 25.
The format should match standard CIDR notation (for example, "10.128.0.0/16").
This field must be omitted if `subnets` is unset or `ipam.mode` is `Disabled`. | | MaxItems: 25
MaxLength: 43
MinItems: 1
| +| `infrastructureSubnets` _[CIDR](#cidr) array_ | infrastructureSubnets specifies a list of internal CIDR ranges that OVN-Kubernetes will reserve for internal network infrastructure.
Any IP addresses within these ranges cannot be assigned to workloads.
When omitted, OVN-Kubernetes will automatically allocate IP addresses from `subnets` for its infrastructure needs.
When there are not enough available IPs in the provided infrastructureSubnets, OVN-Kubernetes will automatically allocate IP addresses from subnets for its infrastructure needs.
When `reservedSubnets` is also specified the CIDRs cannot overlap.
When `defaultGatewayIPs` is also specified, the default gateway IPs must belong to one of the infrastructure subnet CIDRs.
Each item should be in range of the specified CIDR(s) in `subnets`.
The maximum number of entries allowed is 4.
The format should match standard CIDR notation (for example, "10.128.0.0/16").
This field must be omitted if `subnets` is unset or `ipam.mode` is `Disabled`. | | MaxItems: 4
MaxLength: 43
MinItems: 1
| +| `defaultGatewayIPs` _[DualStackIPs](#dualstackips)_ | defaultGatewayIPs specifies the default gateway IP used in the internal OVN topology.
Dual-stack clusters may set 2 IPs (one for each IP family), otherwise only 1 IP is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, an IP from the subnets field is used. | | MaxItems: 2
MinItems: 1
| +| `joinSubnets` _[DualStackCIDRs](#dualstackcidrs)_ | JoinSubnets are used inside the OVN network topology.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, the platform will choose a reasonable default which is subject to change over time. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| | `ipam` _[IPAMConfig](#ipamconfig)_ | IPAM section contains IPAM-related configuration for the network. | | MinProperties: 1
| @@ -209,10 +241,10 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `role` _[NetworkRole](#networkrole)_ | Role describes the network role in the pod.

Allowed values are "Primary" and "Secondary".
Primary network is automatically assigned to every pod created in the same namespace.
Secondary network is only assigned to pods that use `k8s.v1.cni.cncf.io/networks` annotation to select given network. | | Enum: [Primary Secondary]
Required: \{\}
| -| `mtu` _integer_ | MTU is the maximum transmission unit for a network.

MTU is optional, if not provided, the globally configured value in OVN-Kubernetes (defaults to 1400) is used for the network. | | Maximum: 65536
Minimum: 576
| -| `subnets` _[Layer3Subnet](#layer3subnet) array_ | Subnets are used for the pod network across the cluster.

Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
Given subnet is split into smaller subnets for every node. | | MaxItems: 2
MinItems: 1
| -| `joinSubnets` _[DualStackCIDRs](#dualstackcidrs)_ | JoinSubnets are used inside the OVN network topology.

Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, the platform will choose a reasonable default which is subject to change over time. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| +| `role` _[NetworkRole](#networkrole)_ | Role describes the network role in the pod.
Allowed values are "Primary" and "Secondary".
Primary network is automatically assigned to every pod created in the same namespace.
Secondary network is only assigned to pods that use `k8s.v1.cni.cncf.io/networks` annotation to select given network. | | Enum: [Primary Secondary]
Required: \{\}
| +| `mtu` _integer_ | MTU is the maximum transmission unit for a network.
MTU is optional, if not provided, the globally configured value in OVN-Kubernetes (defaults to 1400) is used for the network. | | Maximum: 65536
Minimum: 576
| +| `subnets` _[Layer3Subnet](#layer3subnet) array_ | Subnets are used for the pod network across the cluster.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
Given subnet is split into smaller subnets for every node. | | MaxItems: 2
MinItems: 1
| +| `joinSubnets` _[DualStackCIDRs](#dualstackcidrs)_ | JoinSubnets are used inside the OVN network topology.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, the platform will choose a reasonable default which is subject to change over time. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| #### Layer3Subnet @@ -229,7 +261,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `cidr` _[CIDR](#cidr)_ | CIDR specifies L3Subnet, which is split into smaller subnets for every node. | | MaxLength: 43
| -| `hostSubnet` _integer_ | HostSubnet specifies the subnet size for every node.

When not set, it will be assigned automatically. | | Maximum: 127
Minimum: 1
| +| `hostSubnet` _integer_ | HostSubnet specifies the subnet size for every node.
When not set, it will be assigned automatically. | | Maximum: 127
Minimum: 1
| #### LocalnetConfig @@ -303,10 +335,12 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `topology` _[NetworkTopology](#networktopology)_ | Topology describes network configuration.

Allowed values are "Layer3", "Layer2" and "Localnet".
Layer3 topology creates a layer 2 segment per node, each with a different subnet. Layer 3 routing is used to interconnect node subnets.
Layer2 topology creates one logical switch shared by all nodes.
Localnet topology is based on layer 2 topology, but also allows connecting to an existent (configured) physical network to provide north-south traffic to the workloads. | | Enum: [Layer2 Layer3 Localnet]
Required: \{\}
| +| `topology` _[NetworkTopology](#networktopology)_ | Topology describes network configuration.
Allowed values are "Layer3", "Layer2" and "Localnet".
Layer3 topology creates a layer 2 segment per node, each with a different subnet. Layer 3 routing is used to interconnect node subnets.
Layer2 topology creates one logical switch shared by all nodes.
Localnet topology is based on layer 2 topology, but also allows connecting to an existent (configured) physical network to provide north-south traffic to the workloads. | | Enum: [Layer2 Layer3 Localnet]
Required: \{\}
| | `layer3` _[Layer3Config](#layer3config)_ | Layer3 is the Layer3 topology configuration. | | | | `layer2` _[Layer2Config](#layer2config)_ | Layer2 is the Layer2 topology configuration. | | | | `localnet` _[LocalnetConfig](#localnetconfig)_ | Localnet is the Localnet topology configuration. | | | +| `transport` _[TransportOption](#transportoption)_ | Transport describes the transport technology for pod-to-pod traffic.
Allowed values are "NoOverlay" and "Geneve".
- "NoOverlay": The network operates in no-overlay mode.
- "Geneve": The network uses Geneve overlay.
When omitted, the default behaviour is Geneve. | | Enum: [NoOverlay Geneve]
| +| `noOverlayOptions` _[NoOverlayOptions](#nooverlayoptions)_ | NoOverlayOptions contains configuration for no-overlay mode.
This is only allowed when Transport is "NoOverlay". | | | #### NetworkTopology @@ -328,6 +362,74 @@ _Appears in:_ | `Layer3` | | +#### NoOverlayOptions + + + +NoOverlayOptions contains configuration options for networks operating in no-overlay mode. + + + +_Appears in:_ +- [NetworkSpec](#networkspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `outboundSNAT` _[SNATOption](#snatoption)_ | OutboundSNAT defines the SNAT behavior for outbound traffic from pods. | | Enum: [Enabled Disabled]
| +| `routing` _[RoutingOption](#routingoption)_ | Routing specifies whether the pod network routing is managed by OVN-Kubernetes or users. | | Enum: [Managed Unmanaged]
| + + +#### RoutingOption + +_Underlying type:_ _string_ + + + + + +_Appears in:_ +- [NoOverlayOptions](#nooverlayoptions) + +| Field | Description | +| --- | --- | +| `Managed` | | +| `Unmanaged` | | + + +#### SNATOption + +_Underlying type:_ _string_ + + + + + +_Appears in:_ +- [NoOverlayOptions](#nooverlayoptions) + +| Field | Description | +| --- | --- | +| `Enabled` | | +| `Disabled` | | + + +#### TransportOption + +_Underlying type:_ _string_ + + + + + +_Appears in:_ +- [NetworkSpec](#networkspec) + +| Field | Description | +| --- | --- | +| `NoOverlay` | | +| `Geneve` | | + + #### UserDefinedNetwork @@ -379,7 +481,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `topology` _[NetworkTopology](#networktopology)_ | Topology describes network configuration.

Allowed values are "Layer3", "Layer2".
Layer3 topology creates a layer 2 segment per node, each with a different subnet. Layer 3 routing is used to interconnect node subnets.
Layer2 topology creates one logical switch shared by all nodes. | | Enum: [Layer2 Layer3]
Required: \{\}
| +| `topology` _[NetworkTopology](#networktopology)_ | Topology describes network configuration.
Allowed values are "Layer3", "Layer2".
Layer3 topology creates a layer 2 segment per node, each with a different subnet. Layer 3 routing is used to interconnect node subnets.
Layer2 topology creates one logical switch shared by all nodes. | | Enum: [Layer2 Layer3]
Required: \{\}
| | `layer3` _[Layer3Config](#layer3config)_ | Layer3 is the Layer3 topology configuration. | | | | `layer2` _[Layer2Config](#layer2config)_ | Layer2 is the Layer2 topology configuration. | | | diff --git a/docs/features/bgp-integration/route-advertisements.md b/docs/features/bgp-integration/route-advertisements.md index 1787507375..0d8257521c 100644 --- a/docs/features/bgp-integration/route-advertisements.md +++ b/docs/features/bgp-integration/route-advertisements.md @@ -141,7 +141,7 @@ in shared gateway mode. > previous example must correspond to the remote BGP router's configuration > (router ID, AS number, accept routes, etc...), and vice versa. -### Import routes into a CUDN +### Import routes from the default VRF into a CUDN Assuming we have a CUDN: @@ -165,31 +165,25 @@ spec: hostSubnet: 24 ``` -This example is similar to the previous one with the exception that the BGP -peering session is configured to happen over VRF `extranet`: +After routes have been imported to the default VRF as in the previous example, +a typical scenario is to import those routes from the default VRF to a CUDN as +well. This can be achieved with: ```yaml apiVersion: frrk8s.metallb.io/v1beta1 kind: FRRConfiguration metadata: labels: - use-for-advertisements: extranet - name: receive-filtered-extranet + use-for-advertisements: default + name: import-extranet namespace: frr-k8s-system spec: nodeSelector: {} bgp: routers: - asn: 64512 - neighbors: - - address: 192.168.221.3 - asn: 64512 - disableMP: true - toReceive: - allowed: - mode: filtered - prefixes: - - prefix: 172.20.0.0/16 + imports: + - vrf: default vrf: extranet ``` @@ -205,27 +199,6 @@ traffic of the pods on that network. > pseudo-randomly generated and not easy to predict. Future enhancements will > allow for the VRF name to be configurable. -A typical scenario is to import installed BGP routes from the default VRF to a -CUDN. This can be achieved with: - -```yaml -apiVersion: frrk8s.metallb.io/v1beta1 -kind: FRRConfiguration -metadata: - labels: - use-for-advertisements: default - name: import-extranet - namespace: frr-k8s-system -spec: - nodeSelector: {} - bgp: - routers: - - asn: 64512 - imports: - - vrf: default - vrf: extranet -``` - > [!NOTE] > If you export routes for a CUDN over the default VRF as detailed on the next > sections, installed BGP routes in the default VRF are imported to the CUDN @@ -342,10 +315,42 @@ spec: advertise: true ``` -### Export routes to a CUDN over the network VRF (VRF-Lite) +### Import and export routes to a CUDN over the network VRF (VRF-Lite) + +It is also possible to import and export routes to a CUDN over a BGP session +established over that network's VRF without involving the default VRF at all. -It is also possible to export routes to a CUDN over a BGP session established -over that network's VRF: +To import, we define the proper `FRRConfiguration` first. This example is +similar to how routes are imported for the default pod network with the +exception that the BGP peering session is configured to happen over the CUDN VRF +`extranet`: + +```yaml +apiVersion: frrk8s.metallb.io/v1beta1 +kind: FRRConfiguration +metadata: + labels: + use-for-advertisements: extranet + name: receive-filtered-extranet + namespace: frr-k8s-system +spec: + nodeSelector: {} + bgp: + routers: + - asn: 64512 + neighbors: + - address: 192.168.221.3 + asn: 64512 + disableMP: true + toReceive: + allowed: + mode: filtered + prefixes: + - prefix: 172.20.0.0/16 + vrf: extranet +``` + +Then we define the `RouteAdvertisements` to export: ```yaml apiVersion: k8s.ovn.org/v1 @@ -379,11 +384,11 @@ BGP router could map this isolated traffic to an EVPN achieving a similar use case as if EVPN were to be supported directly. > [!NOTE] -> For the BGP session to be actually established over that network's -> VRF, at least one interface with proper IP configuration needs to be attached -> to the network's VRF. The resulting network egress traffic will be routed -> through that interface. OVN-Kubernetes does not manage this interface nor its -> attachment to the network's VRF. +> For the BGP session to be actually established over that network's VRF, at +> least one interface with proper IP configuration needs to be attached to the +> network's VRF. The CUDN egress traffic matching the learned routes will be +> routed through that interface. OVN-Kubernetes does not manage this interface +> nor its attachment to the network's VRF. > [!NOTE] > This configuration is only supported in local gateway mode. @@ -401,14 +406,14 @@ and will egress the cluster towards the provider network; and if the provider network is able to route it back to the cluster by virtue of learned BGP routes, the traffic will still be dropped to upkeep the CUDN isolation promise. -In the future, different alternatives will be provided to allow interconnecting -user defined networks. +OVN-Kubernetes relaxes the default advertised UDN isolation behavior when the +configuration flag `advertised-udn-isolation-mode` is set to `loose`. In this +configuration, traffic addressing the subnet of a different CUDN will egress the +cluster towards the provider network as before but, if routed back towards the +cluster, connectivity will be allowed in this case. ## Implementation Details -> [!NOTE] -> This section is work in progress. - ### Overview ```mermaid @@ -598,14 +603,79 @@ advertised. Usually N/S egress traffic from a pod is SNATed to the node IP. This does not happen when the network is advertised. In that case the traffic egresses the -cluster with the pod IP as source. For shared gateway mode this is handled with -a conditional SNAT on the OVN configuration for the network which ensures that -E/W egress traffic continues to be SNATed. Egress IP SNAT is unaffected. +cluster with the pod IP as source. In shared gateway mode this is handled with a +conditional SNAT on the gateway routers OVN configuration for the network which +ensures that E/W egress traffic (right now, only pod-to-node traffic) continues +to be SNATed. + +```shell +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- ovn-nbctl list nat +... +_uuid : 7855a3a5-412c-4083-963c-b11aa80b7784 +allowed_ext_ips : [] +exempted_ext_ips : [] +external_ids : {} +external_ip : "172.18.0.2" +external_mac : [] +external_port_range : "32768-60999" +gateway_port : [] +logical_ip : "10.244.1.3" +logical_port : [] +match : "ip4.dst == $a712973235162149816" # added condition matching E/W traffic when advertised +options : {stateless="false"} +priority : 0 +type : snat + +... + +_uuid : 7be1b70b-88c7-4482-85ff-487663be9eda +addresses : ["172.18.0.2", "172.18.0.3", "172.18.0.4", "172.19.0.2", "172.19.0.3", "172.19.0.4"] +external_ids : {ip-family=v4, "k8s.ovn.org/id"="default-network-controller:EgressIP:node-ips:v4:default", "k8s.ovn.org/name"=node-ips, "k8s.ovn.org/owner-controller"=default-network-controller, "k8s.ovn.org/owner-type"=EgressIP, network=default} +name : a712973235162149816 +... +``` + +For CUDNs in local gateway mode, this is handled on a similar way with a +conditional SNAT to the network's masquerade IP which would then finally be +SNATed to the node IP on the host. + +```shell +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- ovn-nbctl list nat +... +_uuid : 61b26442-fa08-4aa8-b326-97afb71edab1 +allowed_ext_ips : [] +exempted_ext_ips : [] +external_ids : {"k8s.ovn.org/network"=cluster_udn_udn-l2, "k8s.ovn.org/topology"=layer2} +external_ip : "169.254.0.11" +external_mac : [] +external_port_range : "32768-60999" +gateway_port : [] +logical_ip : "22.100.0.0/16" +logical_port : [] +match : "ip4.dst == $a712973235162149816" +options : {stateless="false"} +priority : 0 +type : snat +... +``` + +Egress IP SNAT is unaffected. #### Route import When BGP routes get installed in a node's routing table, OVN-Kubernetes -synchronizes them to the gateway router of the corresponding OVN network. +synchronizes them to the gateway router of the corresponding OVN network making +them available for egress in shared gateway mode. + +```shell +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- ovn-nbctl lr-route-list 076a4cba-c680-4fa3-ae2f-1ce7e0a1e153 +IPv4 Routes +Route Table
: + 169.254.0.0/17 169.254.0.4 dst-ip rtoe-GR_ovn-worker2 + 10.244.0.0/16 100.64.0.1 dst-ip + 172.26.0.0/16 172.18.0.5 dst-ip rtoe-GR_ovn-worker2 # learned route synced from host VRF + 0.0.0.0/0 172.18.0.1 dst-ip rtoe-GR_ovn-worker2 +``` ### Host network controllers: impacts on host networking stack @@ -616,23 +686,313 @@ advertised pod networks. This traffic is forwarded to the corresponding patch port of the network and is then handled by OVN with no extra changes required in shared gateway mode. +```shell +❯ kubectl exec -n ovn-kubernetes ovnkube-node-q76br -c ovnkube-controller -- ovs-ofctl dump-flows breth0 +... + # flows forwarding pod networks to the corresponding patch ports + cookie=0xdeff105, duration=445.802s, table=0, n_packets=0, n_bytes=0, idle_age=445, priority=300,ip,in_port=1,nw_dst=10.244.0.0/24 actions=output:2 + cookie=0xdeff105, duration=300.323s, table=0, n_packets=0, n_bytes=0, idle_age=300, priority=300,ip,in_port=1,nw_dst=22.100.0.0/16 actions=output:3 +``` + In local gateway mode, the traffic is forwarded to the host networking stack -where it is handled with no further configuration changes required. +from where it is routed to the network management port. + +```shell +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- ovs-ofctl dump-flows breth0 + ... + # flows forwarding pod networks to host + cookie=0xdeff105, duration=57.620s, table=0, n_packets=0, n_bytes=0, idle_age=57, priority=300,ip,in_port=1,nw_dst=22.100.0.0/16 actions=LOCAL + cookie=0xdeff105, duration=9589.541s, table=0, n_packets=0, n_bytes=0, idle_age=9706, priority=300,ip,in_port=1,nw_dst=10.244.1.0/24 actions=LOCAL + ... + +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- ip route +... +# routing to the default pod network management port +10.244.0.0/16 via 10.244.1.1 dev ovn-k8s-mp0 +10.244.1.0/24 dev ovn-k8s-mp0 proto kernel scope link src 10.244.1.2 +... + +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- ip rule +... +# for a CUDN, an ip rule takes care of routing on the correct VRF +2000: from all to 22.100.0.0/16 lookup 1010 +... + +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- ip r show table 1010 +... +# also routing to the CUDN management port +22.100.0.0/16 dev ovn-k8s-mp1 proto kernel scope link src 22.100.0.2 +... +``` #### Host SNAT behavior with BGP Advertisement In the same way that was done for the OVN configuration, the host networking stack configuration is updated to inhibit the SNAT for N/S traffic. +```shell +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- nft list ruleset +... + set remote-node-ips-v4 { + type ipv4_addr + comment "Block egress ICMP needs frag to remote Kubernetes nodes" + elements = { 172.18.0.3, 172.18.0.4, + 172.19.0.2, 172.19.0.4 } + } +... + chain ovn-kube-pod-subnet-masq { + # ip daddr condition added if default pod network advertised + ip saddr 10.244.1.0/24 ip daddr @remote-node-ips-v4 masquerade # ip daddr condition if advertised + } +... +``` + #### VRF-Lite isolation To ensure isolation in VRF-Lite configurations, the default route pointing to the default VRF gateway present on the network's VRF is inhibited. Thus only BGP installed routes will be used for N/S traffic. +```shell +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- ip r show table 1010 +# default match unreachable +unreachable default metric 4278198272 +... +# installed route going through interface attached to VRF +172.26.0.0/16 nhid 28 via 172.19.0.5 dev eth1 proto bgp metric 20 +``` + +#### CUDN isolation + +To ensure CUDN isolation in local gateway mode filtering rules are added to the host configuration + +```shell +❯ kubectl exec -n ovn-kubernetes ovnkube-node-vkmkt -c ovnkube-controller -- nft list ruleset +... + set advertised-udn-subnets-v4 { + type ipv4_addr + flags interval + comment "advertised UDN V4 subnets" + elements = { 22.100.0.0/16 comment "cluster_udn_udn-l2" } + } +... + chain udn-bgp-drop { + comment "Drop traffic generated locally towards advertised UDN subnets" + type filter hook output priority filter; policy accept; + ct state new ip daddr @advertised-udn-subnets-v4 counter packets 0 bytes 0 drop + ct state new ip6 daddr @advertised-udn-subnets-v6 counter packets 0 bytes 0 drop + } +... +``` + +These rules are inhibited if OVN-Kubernetes is configured in "loose advertised +UDN isolation mode". + ## Troubleshooting -TBD +### Troubleshooting RouteAdvertisements + +Check `RouteAdvertisement` status for configuration errors: + +```shell +❯ kubectl get ra +NAME STATUS +default Accepted +extranet Not Accepted: configuration pending: no networks selected +``` + +Check that `FRRConfiguration` have been generated as expected: + +```shell +❯ kubectl get frrconfiguration -n frr-k8s-system +NAME AGE +ovnk-generated-66plb 14m +ovnk-generated-fxncs 13m +ovnk-generated-grdfg 14m +ovnk-generated-qhz9b 14m +ovnk-generated-sgphk 13m +ovnk-generated-vtwpv 13m +receive-all 14m +``` + +Expected `FRRConfiguration` are: +- Any manual configuration done to import routes +- MetalLB generated FRRConfiguration if in use +- One of ovnk-generated-XXXXX configuration per RouteAdvertisement and selected FRRConfiguration/Node combination + +### Troubleshooting FRR-K8s + +FRR-K8s merges all FRRConfiguration into a single FRR configuration for each +node. The status of generating that configuration and applying it to FRR daemon +running on each node is relayed through `FRRNodeStates`: + +```shell +❯ kubectl get -n frr-k8s-system frrnodestates +NAME AGE +ovn-control-plane 16m +ovn-worker 16m +ovn-worker2 16m + +$ oc describe -n openshift-frr-k8s frrnodestates worker-0.ostest.test.metalkube.org +Name: worker-0.ostest.test.metalkube.org +Namespace: +Labels: +Annotations: +API Version: frrk8s.metallb.io/v1beta1 +Kind: FRRNodeState +Metadata: + Creation Timestamp: 2025-09-10T11:29:44Z + Generation: 1 + Resource Version: 52036 + UID: 34f67799-9642-40a3-a378-67ca3ad5dfd2 +Spec: +Status: + Last Conversion Result: success # whether FRRConfiguration merge and conversion to FRR config was successful + Last Reload Result: success # whether resulting FRR config was applied correctly + Running Config: + # the FRR running config is displayed here +... +``` + +FRR-K8s provides metrics: + +```text + Namespace = "frrk8s" + Subsystem = "bgp" + + SessionUp = metric{ + Name: "session_up", + Help: "BGP session state (1 is up, 0 is down)", + } + + UpdatesSent = metric{ + Name: "updates_total", + Help: "Number of BGP UPDATE messages sent", + } + + Prefixes = metric{ + Name: "announced_prefixes_total", + Help: "Number of prefixes currently being advertised on the BGP session", + } + + ReceivedPrefixes = metric{ + Name: "received_prefixes_total", + Help: "Number of prefixes currently being received on the BGP session", + } +``` + +### Troubleshooting FRR + +FRR is deployed by FRR-K8s as a daemonset and runs on every node: + +```shell +❯ kubectl get pods -n frr-k8s-system -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +frr-k8s-daemon-5cqbq 6/6 Running 0 22m 172.18.0.4 ovn-worker2 +frr-k8s-daemon-6hmzb 6/6 Running 0 22m 172.18.0.3 ovn-worker +frr-k8s-daemon-gsmml 6/6 Running 0 22m 172.18.0.2 ovn-control-plane +... +``` + +Different aspects of the running daemons can be checked through the `vtysh` CLI. +Some examples are: + +- The running configuration: + +```shell +$ kubectl exec -ti -n frr-k8s-system frr-k8s-daemon-5cqbq -c frr -- vtysh -c "show running-conf" +Building configuration... + +Current configuration: +! +frr version 8.5.3 +frr defaults traditional +hostname ovn-worker2 +log file /etc/frr/frr.log informational +log timestamp precision 3 +no ip forwarding +service integrated-vtysh-config +! +router bgp 64512 + no bgp ebgp-requires-policy + no bgp hard-administrative-reset +... +``` + +- The BGP session states: + +```shell +❯ kubectl exec -ti -n frr-k8s-system frr-k8s-daemon-5cqbq -c frr -- vtysh -c "show bgp neighbor 172.18.0.5" +BGP neighbor is 172.18.0.5, remote AS 64512, local AS 64512, internal link + … +Hostname: 78d5a0f1d3cd + BGP version 4, remote router ID 172.18.0.5, local router ID 172.18.0.4 + BGP state = Established, up for 00:01:29 + ... + Last reset 00:03:30, Peer closed the session +... +``` + +- The actual routes exchanged through BGP: + +```shell +❯ kubectl exec -ti -n frr-k8s-system frr-k8s-daemon-5cqbq -c frr -- vtysh -c "show bgp ipv4" +BGP table version is 2, local router ID is 172.18.0.4, vrf id 0 +Default local pref 100, local AS 64512 +Status codes: s suppressed, d damped, h history, * valid, > best, = multipath, + i internal, r RIB-failure, S Stale, R Removed +Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self +Origin codes: i - IGP, e - EGP, ? - incomplete +RPKI validation codes: V valid, I invalid, N Not found + + Network Next Hop Metric LocPrf Weight Path + *> 10.244.0.0/24 0.0.0.0 0 32768 i + *> 22.100.0.0/16 0.0.0.0 0 32768 i + *>i172.26.0.0/16 172.18.0.5 0 100 0 i + + +Displayed 2 routes and 2 total paths +``` + +- Routes installed on the host and their origin: + +```shell +❯ kubectl exec -ti -n frr-k8s-system frr-k8s-daemon-5cqbq -c frr -- vtysh -c "show ip route" +Codes: K - kernel route, C - connected, S - static, R - RIP, + O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP, + T - Table, v - VNC, V - VNC-Direct, F - PBR, + f - OpenFabric, + > - selected route, * - FIB route, q - queued, r - rejected, b - backup + t - trapped, o - offload failure + +... +B>* 172.26.0.0/16 [200/0] via 172.18.0.5, breth0, weight 1, 00:41:11 +... +``` + +Most of these commands have variations to check the same information specific to +a VRF: + +```shell +❯ kubectl exec -ti -n frr-k8s-system frr-k8s-daemon-gv76r -c frr -- vtysh -c "show ip route vrf udn-l2" +Codes: K - kernel route, C - connected, S - static, R - RIP, + O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP, + T - Table, v - VNC, V - VNC-Direct, A - Babel, F - PBR, + f - OpenFabric, + > - selected route, * - FIB route, q - queued, r - rejected, b - backup + t - trapped, o - offload failure + +VRF udn-l2: +... +B>* 172.26.0.0/16 [200/0] via 172.18.0.5, breth0 (vrf default), weight 1, 01:39:55 +... +``` + +### Troubleshooting dataplane + +FRR applies its configuration to the host networking stack in the form of +routes. Thus standard tooling can be used for dataplane troubleshooting: +connectivity checks, tcpdump, ovn-trace, ovs-trace, ... ## Best Practices diff --git a/docs/features/live-migration.md b/docs/features/live-migration.md index adcd2a4f47..c40822e069 100644 --- a/docs/features/live-migration.md +++ b/docs/features/live-migration.md @@ -123,21 +123,16 @@ Login and check that the VM has receive a proper address `virtctl console fedora Also we can check the neighbours cache to verify it later on ```bash [fedora@fedora ~]arp -a -_gateway (169.254.1.1) at 0a:58:a9:fe:01:01 [ether] on eth0 +_gateway (10.244.2.1) at 0a:58:a9:fe:01:01 [ether] on eth0 ``` - -Keep in mind the default gw is a link local address; that is because -the live migration feature is implemented using ARP proxy. - -The last route is needed since the link local address subnet is not bound to any interface, that -route is automatically created by dhcp client. +The default gateway is the pod network subnet gateway IP. The ARP proxy feature +ensures the gateway MAC address remains consistent across live migrations. ```bash [fedora@fedora ~]ip route -default via 169.254.1.1 dev eth0 proto dhcp src 10.244.2.26 metric 100 +default via 10.244.2.1 dev eth0 proto dhcp src 10.244.2.26 metric 100 10.244.2.0/24 dev eth0 proto kernel scope link src 10.244.2.26 metric 100 -169.254.1.1 dev eth0 proto dhcp scope link src 10.244.2.26 metric 100 ``` Then a live migration can be initialized with `virtctl migrate fedora` and wait @@ -171,7 +166,7 @@ default fedora 16m Running 10.244.2.26 ovn-worker True inet6 fe80::32d2:10d4:f5ed:3064/64 scope link noprefixroute valid_lft forever preferred_lft forever [fedora@fedora ~]arp -a -_gateway (169.254.1.1) at 0a:58:a9:fe:01:01 [ether] on eth0 +_gateway (10.244.2.1) at 0a:58:a9:fe:01:01 [ether] on eth0 ``` ### Configuring dns server @@ -264,9 +259,9 @@ Benefit of the bridge binding is that is able to expose the pod IP to the VM as ### OVN-Kubernetes Implementation Details To implement live migration ovn-kubernetes do the following: -- Send DHCP replies advertising the allocated IP address to the guest VM (via OVN-Kubernetes DHCP options configured for the logical switch ports). +- Send DHCP replies advertising the allocated IP address and subnet gateway to the guest VM (via OVN-Kubernetes DHCP options configured for the logical switch ports). - A point to point routing is used so one node's subnet IP can be routed from different node -- The VM's gateway IP and MAC are independent of the node they are running on using proxy arp +- The VM's gateway IP (subnet gateway) and MAC are kept consistent across nodes using ARP proxy **Point to point routing:** @@ -330,16 +325,16 @@ Remote zone: **Nodes logical switch ports:** -To have a consistent gateway at VMs (keep ip and mac after live migration) -the "arp_proxy" feature is used and it need to be activated at -the logical switch port of type router connects node's logical switch to +To have a consistent gateway at VMs (keep ip and mac after live migration) +the "arp_proxy" feature is used and it need to be activated at +the logical switch port of type router connects node's logical switch to ovn_cluster_router logical router. The "arp_proxy" LSP option will include the MAC to answer ARPs with, and the -link local ipv4 and ipv6 to answer for and the cluster wide pod CIDR to answer -to pod subnets when the node switch do not have the live migrated ip. The -flows from arp_proxy has less priority than the ones from the node logical -switch so ARP flows are not overriden. +link local ipv4 and ipv6 addresses, as well as the cluster wide pod CIDR. This +allows the proxy to answer ARP requests for the subnet gateway IP when the node +switch does not have the live migrated IP. The flows from arp_proxy have less +priority than the ones from the node logical switch so ARP flows are not overriden. ```text ┌────────────────────┐ ┌────────────────────┐ @@ -381,7 +376,7 @@ Also the DHCP options will be configured to deliver the address to the VMs ┌─────────────────────────────────┐ │ dhcp-options 1234 │ │ lease_time: 3500 │ -│ router: 169.254.1.1 │ +│ router: 10.244.0.1 │ │ dns_server: [kubedns] │ │ server_id: 169.254.1.1 │ │ server_mac: c0:ff:ee:00:00:01 │ diff --git a/docs/installation/launching-ovn-kubernetes-with-helm.md b/docs/installation/launching-ovn-kubernetes-with-helm.md index c25c107af5..6da4a3b082 100644 --- a/docs/installation/launching-ovn-kubernetes-with-helm.md +++ b/docs/installation/launching-ovn-kubernetes-with-helm.md @@ -120,15 +120,6 @@ false Controls if forwarding is allowed on OVNK controlled interfaces - - global.disableIfaceIdVer - bool -
-false
-
- - Deprecated: iface-id-ver is always enabled - global.disablePacketMtuCheck string diff --git a/docs/okeps/images/hybrid-ip-vrf-evpn.png b/docs/okeps/images/hybrid-ip-vrf-evpn.png new file mode 100644 index 0000000000..b96253bcde Binary files /dev/null and b/docs/okeps/images/hybrid-ip-vrf-evpn.png differ diff --git a/docs/okeps/images/l2-evpn-vm-migration.png b/docs/okeps/images/l2-evpn-vm-migration.png new file mode 100644 index 0000000000..71ab415f37 Binary files /dev/null and b/docs/okeps/images/l2-evpn-vm-migration.png differ diff --git a/docs/okeps/images/l2evpn.png b/docs/okeps/images/l2evpn.png new file mode 100644 index 0000000000..e8a7dbc5f5 Binary files /dev/null and b/docs/okeps/images/l2evpn.png differ diff --git a/docs/okeps/images/l3evpn.png b/docs/okeps/images/l3evpn.png new file mode 100644 index 0000000000..ceb2e75a78 Binary files /dev/null and b/docs/okeps/images/l3evpn.png differ diff --git a/docs/okeps/okep-4368-template.md b/docs/okeps/okep-4368-template.md index 0f14591dd1..ba590d1c29 100644 --- a/docs/okeps/okep-4368-template.md +++ b/docs/okeps/okep-4368-template.md @@ -100,6 +100,12 @@ which version is this feature planned to be introduced in? check repo milestones/releases to get this information for when the next release is planned for +## Backwards Compatibility + +(Describe any backwards compatibility considerations for this feature. +This should include any changes to the API, datapath, or other components +that may be backwards incompatible.) + ## Alternatives (List other design alternatives and why we did not go in that @@ -108,4 +114,4 @@ direction) ## References (Add any additional document links. Again, we should try to avoid -too much content not in version control to avoid broken links) \ No newline at end of file +too much content not in version control to avoid broken links) diff --git a/docs/okeps/okep-5088-evpn.md b/docs/okeps/okep-5088-evpn.md new file mode 100644 index 0000000000..e73ba41acf --- /dev/null +++ b/docs/okeps/okep-5088-evpn.md @@ -0,0 +1,890 @@ +# OKEP-5088: EVPN Support + +* Issue: [#5088](https://github.com/ovn-org/ovn-kubernetes/issues/5088) + +## Problem Statement + +The purpose of this enhancement is to add support for EVPN within the OVN-Kubernetes SDN, specifically with BGP. This +effort will allow exposing User Defined Networks (UDNs) externally via a VPN to other entities either +inside, or outside the cluster. BGP+EVPN is a common and native networking standard that will enable integration into +a user's networks without SDN specific network protocol integration, and provide an industry standardized way to achieve +network segmentation between sites. + +## Goals + +* To provide a user facing API to allow configuration of EVPN on Kubernetes worker nodes to integrate with a provider's + EVPN fabric. +* EVPN support will be provided for Layer 2 (MAC-VRF) or Layer 3 (IP-VRF) OVN-Kubernetes Primary User Defined Network + types. +* EVPN Multi-Homing + Mass Withdrawal support, including BFD support for link detection. +* FRR providing EVPN connectivity via BGP and acting as the Kubernetes worker node PE router. +* Support for EVPN in local gateway mode only. +* Support for EVPN in on-prem deployments only. + +## Non-Goals + +* Providing support for any other virtual router as a PE router. +* Asymmetric Integrated Routing and Bridging (IRB) with EVPN. +* Supporting EVPN via the Cluster Default Network (CDN). + +## Future-Goals + +* Support for EVPN in shared gateway mode once there is OVN support. +* Potentially advertising service Cluster IPs. +* Cloud platform BGP/EVPN enablement. +* Providing EVPN support for Secondary User Defined Network types. +* Specifying a VXLAN port other than 4789 for EVPN. +* Support for interconnecting two Kubernetes clusters with EVPN and then allowing VM migration across them. + +## Introduction + +The [BGP enhancement](./okep-5296-bgp.md) has been implemented in OVN-Kubernetes, which allows a user expose pods and +other internal Kubernetes network entities outside the cluster with dynamic routing. + +Additionally, the User Defined Network (UDN) feature has +brought the capability for a user to be able to create per tenant networks. Combining these features today allows a user +to either: +- BGP advertise the Cluster Default Network (CDN) as well as leak non-IP-overlapping UDNs into default VRF. +- Expose UDNs via BGP peering over different network interfaces on an OCP node, allowing a VPN to be terminated on the + next hop PE router, and preserved into the OCP node. Also known in the networking industry as VRF-Lite. + +While VRF-Lite allows for a UDN to be carried via a VPN to external networks, it is cumbersome to configure and requires +an interface per UDN to be available on the host. By leveraging EVPN, these limitations no longer exist and all UDNs can +traverse the same host interface, segregated by VXLAN. Furthermore, with exposing UDNs via BGP today there is a limitation +that these networks are advertised as an L3 segment, even with VRF-Lite. With EVPN, we can now stretch the L2 UDN segment +across the external network fabric. +Finally, EVPN is a common datacenter networking fabric that many users with Kubernetes clusters already +rely on for their top of rack (TOR) network connectivity. It is a natural next step to enable the Kubernetes platform +to be able to directly integrate with this fabric directly. + +### EVPN/UDN Background + +This section provides some background on UDN, EVPN. It is important to provide this context for a better understanding of +the following sections and design choices. + +UDNs are separate and potentially overlapping networks that are naturally isolated from each other. Each Primary UDN maps +to a corresponding VRF in Linux. This is true for both Layer 2 and Layer 3 networks. The ovn-k8s-mpx interface exists +per UDN and is configured inside of this Linux VRF. + +EVPN provides a mechanism for carrying potentially overlapping Layer 2 and/or Layer 3 networks while keeping them +isolated from each other in a Virtual Private Network (VPN) over an ethernet fabric via BGP extensions and an overlay +mechanism, typically VXLAN. + +Therefore, it is a logical next step to leverage EVPN to carry PUDNs as it fits the paradigm well. However, there are key +design choices to be aware of. With EVPN a Layer 2 network (MAC-VRF) can be thought of as simply a switch extended across +the ethernet fabric and used for East/West traffic. +With EVPN a Layer 2 network can belong to MAC-VRF as well as an IP-VRF (Layer 3) network simultaneously. +In practice, this means the Layer 2 switch has a Switch Virtual Interface (SVI) which is plugged into a router (VRF), and +clients on the Layer 2 network use this SVI as their default gateway for routing. In other words, if you have a pod connected +to a switch (MAC-VRF that spans all nodes) it can talk to other pods or entities on that Layer 2 network without leaving the +MAC-VRF. On the other hand if this pod needs to talk to the internet, it will go via its default gateway (SVI) and then be routed +via the IP-VRF. With EVPN, multiple Layer 2 networks can map to a single IP-VRF. + +Let's now look at how a Layer 2 UDN works in practice. For East/West it uses Geneve to ensure MAC-VRF like functionality. +For North/South by default we leak the Layer 2 into the default VRF (CDN) and use masquerading to avoid collisions with +other overlapping Layer 2 UDNs. If BGP is enabled, this Layer 2 UDN may be advertised directly (without masquerading) in +the default VRF. Neither of these options provide IP-VRF like isolation, in both cases the network is leaked into the default +VRF. When leaking into the default VRF, multiple Layer 2 UDNs may not overlap in their subnet range, each subnet must be +unique. +Another option is VRF-Lite, where a user can connect a dedicated NIC (think VLAN interface) to the VRF provisioned +in the Linux host for the Layer 2 UDN. With this model, a Layer 3 VPN is created, but it relies on manual configuration +on the host, as well as configuration on the upstream PE router. Now in order to route Northbound, the pod sends the packet +to its default gateway, the transit router, which will either route towards ovn-k8s-mpx (local gw mode) or the gateway +router (GR). Note, VRF-Lite only works for Layer 3 type VPNs, and does not provide a Layer 2 type of VPN technology to +handle VM live migration use cases. + +For Layer 3 networks, in OVN-Kubernetes we break up a supernet into a per-node subnet for the PUDN. Whenever a pod on +a node wants to talk to a pod on another node or anything externally, pure routing is used. This is the equivalent of an +IP-VRF. + +Furthermore, so far we have talked about carrying VPN for Layer 2 and Layer 3 PUDNs. What about the Cluster Default Network (CDN)? +This network lives in the default VRF. Typically, the default VRF is not carried over the EVPN fabric as a VPN. For +the purpose of this enhancement it will be considered not be part of an EVPN. The CDN may still use Geneve, or it may +rely on the upcoming no-overlay mode to use a pure BGP routing scheme. + +As far as underlying technologies go, we use FRR as our BGP provider. As EVPN relies on BGP, we will continue to use FRR +for EVPN support. + +### Transitioning to EVPN from Geneve + +For Layer 2 and Layer 3 PUDNs Geneve is no longer used when enabling EVPN. Instead, VXLAN is used as an overlay to carry +per network VPN packets across the EVPN fabric. In practice this means we still need a switch domain that crosses all nodes +for Layer 2, and we need per node routing for Layer 3. FRR integrates directly with Linux to provide EVPN support. It +uses netdevs like Linux Bridge, VXLAN VTEPs, VRFs in order to map Linux networks to advertised EVPN VNIs. + +We know that we create a VRF per Layer 2/Layer 3 UDN, which gives us our mapping to an EVPN VRF in Linux that FRR will +leverage. Therefore, we can conclude that every Layer 2 and Layer 3 UDN maps to an IP-VRF already. This implicit design +choice precludes us from taking several Layer 2 PUDNs and mapping them to a single IP-VRF. In other words, each MAC-VRF +in OVN-Kubernetes will map 1 to 1 with an IP-VRF. In practice, the consequence of this is a user is unable to connect +multiple Layer 2 PUDNs to the same "router" and allow routing between them. However, as we will see later, this can be +provided by other forms of route leaking (route target importing/exporting) between IP-VRFs. Nonetheless, it is key to +point out this implicit design choice and why it exists. It also provides implicit Network Isolation which was one of the +key tenets to the UDN design in the first place. So one could view this as intended behavior. + +Additionally, we have the ovn-k8s-mpx interface, which gives us a way to route for Layer 2 networks into the +VRF. This interface can be viewed as our SVI. It is a way that we can get the packet to the VRF (IP-VRF) to be routed. +Note the SVI can be the same on every single node, since we are using symmetrical Integrated Routing and Bridging (IRB). +The SVI IP will not be advertised to via BGP, and is only used for local pod routing. + +When we consider non-routed traffic for Layer 2 (just MAC-VRF traffic), we need a way for Layer 2 destined packets to get +VXLAN encapsulated and sent out with the right VNI. As previously mentioned, FRR relies on a Linux Bridge to serve as the +Layer 2 switch, which is connected to a VXLAN VTEP. With OVN, we obviously use a logical_switch that exists as flows in +br-int. In order to integrate with FRR/EVPN for local gateway mode, we will need to connect a port from the logical_switch +to the Linux Bridge. More technical details on that in the sections ahead. + +## User-Stories/Use-Cases + +The user stories will be broken down into more detail in the subsections below. The main use cases include: +* As a user, I want to connect my Kubernetes cluster to VMs or physical hosts on an external network. I want tenant + pods/VMs inside my Kubernetes cluster to be able to only communicate with certain network segments on this external + network. +* As a user, I want to be able to live migrate VMs from my external network onto the Kubernetes platform. +* As a user, my data center where I run Kubernetes is already using EVPN today. I want to eliminate the use of Geneve + which causes double encapsulation (VXLAN and Geneve), and integrate natively with my networking fabric. +* As a user, I want to create overlapping IP address space UDNs, and then connect them to different external networks + while preserving network isolation. + +### Extending UDNs into the provider network via EVPN + +This use case is about connecting a Kubernetes cluster to one or more external networks and preserving the network +isolation of the UDN and external virtual routing and forwarding instances (VRFs) segments. Consider the following +diagram: + +![](images/hybrid-ip-vrf-evpn.png) + +In this example a user has traditional Finance and HR networks. These networks are in their own VRFs, meaning they are +isolated from one another and are unable to communicate or even know about the other. These networks may overlap in IP +addressing. Additionally, the user has a Kubernetes cluster where they are migrating some traditional servers/VMs +workloads over to the Kubernetes platform. In this case, the user wants to preserve the same network isolation they had +previously, while also giving the Kubernetes based Finance and HR tenants connectivity to the legacy external networks. + +By combining EVPN and UDN this becomes possible. The blue Finance network UDN is created with the Kubernetes cluster, and +integrated into the user's EVPN fabric, extending it to the traditional Finance external network. The same is true for +the yellow HR network. The Finance and HR network isolation is preserved from the Kubernetes cluster outward to the +external networks. + +### Extending Layer 2 UDNs into the provider network to allow VM migration + +Building upon the previous example, the network connectivity between a UDN and an external network can be done using +either Layer 3 (IP-VRF) or Layer 2 (MAC-VRF). With the former, routing occurs between entities within the Kubernetes +UDN and the corresponding external network, while with the latter, the UDN and the external network are both part of the +same layer 2 broadcast domain. VM migration relies on being a part of the same L2 segment in order to preserve MAC +address reachability as well as IP address consistency. With MAC-VRFs and EVPN it becomes possible to extend the +layer 2 network between the kubernetes cluster and outside world: + +![](images/l2-evpn-vm-migration.png) + +The image above depicts a Layer 2 UDN which not only exists across the worker nodes node-1 and node-2 but is also stretched +into Provider Network 1. In this scenario, vm-2 is able to migrate into node-1 on the UDN network, preserving the same IP +address it had in the external provider network. Similarly, there is another Provider Network 2 which may or may not +correspond to another UDN within the Kubernetes cluster. However, notice that the red and blue networks are both using +the same IP addressing scheme and sharing the same hardware, however due to VRF isolation they are completely unaware and +unable to communicate with each other. + +### Using EVPN as the Overlay for User Defined Networks + +With integrating into a customer's already existing TOR spine and leaf architecture, Geneve can be disabled, and network +segmentation will still persist for east/west traffic due to VXLAN tunnels with EVPN. This is true for both IP-VRFs, +and MAC-VRFs. This reduces packet overhead for customers, while also providing some other advantages that come with +EVPN, such as link redundancy and broadcast, unknown unicast, and multicast (BUM) traffic suppression. + +## Proposed Solution + +As mentioned in the previous sections, EVPN will continue to build upon the BGP support already implemented into +OVN-Kubernetes using FRR. This support includes integration with an OVN-Kubernetes API as well as an FRR-K8S API for +configuring BGP peering and routing for UDNs. FRR already supports EVPN and similar API resources will be leveraged to +accomplish configuring FRR as the BGP/EVPN control plane. Recall that FRR relies on Linux netdevs to be configured in +order for EVPN to work. There are two configuration modes in FRR to accomplish this. + +### Multiple VXLAN Devices (MVD) + +MVD is the classic way that FRR maps netdevs to EVPN configuration. In this model we create the following Linux constructs +in order to use EVPN: + +1. A VRF device +2. Linux Bridge enslaved to the VRF +3. An SVI attached to the bridge (for IP-VRF) +4. A VXLAN device enslaved to the bridge +5. A VTEP IP configured locally (generally on a loopback interface) + +Devices 1-4 are needed to be configured for each network (UDN). Only a single VTEP IP is needed, although more than one +could be configured if it was desired to use multiple tunnels (uncommon). + +### Single VXLAN Device (SVD) + +SVD is a newer way that was created in order to solve scale issues around creating too many netdevs with MVD. In this model +only a single VXLAN device is created, along with a single Linux Bridge. VLANs are used within the Linux Bridge to segment +networks. The following devices are required: + +1. A VRF device (one per UDN) +2. Linux Bridge (single bridge with a VLAN per UDN) +3. An SVI attached the bridge (for IP-VRF, one VLAN sub-interface per UDN) +4. A VXLAN device enslaved to the bridge (added in the UDN VLAN) +5. A VTEP IP configured locally (generally on a loopback interface) + +SVD is supposed to scale better than MVD, and is similar to how other physical routers allow a single VTEP to be used +for multiple VNIs. The one drawback with the SVD implementation, is that by mapping VNIs to VLANs, we are limited to +4096 max VNIs per node, and thus limited to 4094 MAC+IP VRFs per node. It may be possible to extend SVD by creating +another set of bridge/VTEP pair and mapping the VLANs to VNIs > 4094. Needs to be investigated and confirmed. + +SVD is only supported in FRR9 and later. + +### FRR integration with OVN-Kubernetes + +OVN-Kubernetes will be in charge of managing and configuring these devices on the node. We do not want to support both +MVD and SVD. Due to the simplicity and scalability, we will choose to support SVD and accept the potential drawback of +limited number of VRFs we can advertise. Additionally, this enhancement limits the scope of EVPN support to local gateway +mode, which means traffic will flow through the Linux Bridge (MAC-VRF) as well as through the Linux VRF (IP-VRF). In the +future, OVN will add support for also watching the Linux netdevs, and then configuring OVN to act as a VXLAN VTEP for +the UDNs. + +If we consider the SVD devices that are required for OVN-Kubernetes integration, recall that we already create: + * Linux VRF for every Layer2/Layer3 UDN + * An ovn-k8s-mpx interface connected to the VRF and plugged into the UDN worker switch + +These devices already give us the VRF and SVI devices we need for 2 of the 5 netdevs we need for EVPN. For an IP-VRF, +we just need a way to get the packet from the pod into the IP-VRF, it does not matter if we use the ovn-k8s-mpx interface +or if we use an SVI interface attached to the Linux Bridge. Therefore, we will never configure an IP address on the SVI +interface of the Linux bridge, and rely on ovn-k8s-mpx to get packets to the IP-VRF. + +For the remaining devices, the API will drive their creation, covered in a later section. + +### Workflow Description + +Tenants as well as admins are able to create UDNs and CUDNs for their namespace(s), respectively. However, only CUDNs +are allowed to be BGP advertised by admins. This trend will continue with EVPN, where it will require admin access in order +to enable EVPN for one or more UDNs. A typical workflow will be: + +1. Configure BGP peering via interacting with the FRR-K8S API for a given set of worker nodes. +2. Create a VTEP CR that defines the VTEP IP to be used with EVPN VXLAN. +3. Create a primary Layer 2 or Layer 3 CUDN. Within the the CUDN CR specify an EVPN overlay configuration. +4. Create a RouteAdvertisements CR to specify what routes should be advertised via EVPN for this UDN. + +### API Details + +### FRR-K8S + +FRR-K8S will need to be extended to allow for configuring specific EVPN FRR configuration. In order to advertise MAC-VRFs +the following configuration needs to happen in FRR: + +```bash +router bgp 64512 + ! + address-family l2vpn evpn + neighbor 192.168.1.0 activate + neighbor 192.168.1.0 allowas-in origin + advertise-all-vni +``` + +This configuration signals to FRR to advertise all VNIs detected for Layer 2 MAC-VRFs. It does this by looking at +the netdevs and finding Linux Bridges/VLANs within the node. + +For IP-VRFs, we need to be able to add the following configuration: + +```bash +vrf udnA + vni 100 +exit-vrf +! +router bgp 64512 vrf udnA + ! + address-family l2vpn evpn + route-target import 64512:100 + route-target export 64512:100 + exit-address-family +exit +``` + +In the above configuration, the "vrf udnA" refers to an IP-VRF, then we need a router bgp vrf stanza activating EVPN +for it. EVPN also utilizes BGP Route-Targets (RT) in order to import/export routes between IP-VRFs. We must configure at +least the targets for the VRF itself so that it will import/export routes from other nodes in its own IP-VRF. In the +future this route-target config may be extended to include other IP-VRFs (UDNs) in order to connect networks together. +Similar to the "Network Connect" feature to connect UDNs via OVN+Geneve. + +In order for OVN-Kubernetes to generate FRR-K8S configuration that includes this EVPN specific configuration, FRR-K8S +API will need to be extended. Those API extensions will be designed and implemented within the FRR-K8S project. + +### VTEP CRD + +A VTEP CRD will be created which allows an admin to define VTEP IPs to be associated with an EVPN enabled CUDN. In the +the future this VTEP CRD may be extended to other use cases like providing VTEP IPs for Geneve tunnels, etc. + +```yaml +apiVersion: k8s.ovn.org/v1 +kind: VTEP +metadata: + name: evpn-vtep +spec: + cidr: 100.64.0.0/24 + mode: managed +``` + +The cidr field is mandatory. If the mode is "managed", then OVN-Kubernetes will handle allocating and assigning +VTEP IPs per node. If the mode is not provided, or is "unmanaged", then it is left to some other provider to handle +adding the IP address to each node from the subnet provided. In unmanaged mode, OVN-Kubernetes will find an interface on +the node which has an IP address in the cidr, and use that IP address. Unmanaged mode may be preferred where a provider +handles assigning VTEP IPs within its EVPN fabric. In this case OVN-Kubernetes cluster is integrating into an already +configured EVPN fabric. Therefore VTEP IP provisioning may be done by the provider and configured for each node. + +The IP address assigned or found by OVN-Kubernetes will be annotated to the node as `k8s.ovn.org/: `. +If all nodes do not have an IP address annotated for this VTEP, the VTEP CR will go into error state. + +### CUDN CRD changes for EVPN + +This API change depends on the [No Overlay Feature](https://github.com/ovn-kubernetes/ovn-kubernetes/pull/5289) which will +introduce a new Transport field that we can extend to use EVPN. The API change will look like this: + +```yaml +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: l2-primary + labels: + bgp: enabled +spec: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: udn-test + network: + topology: Layer2 + layer2: + role: Primary + subnets: + - 10.20.100.0/16 + transport: EVPN + evpnConfiguration: + vtep: evpn-vtep + macVRF: + vni: 100 + routeTarget: "65000:100" + ipVRF: + vni: 101 + routeTarget: "65000:101" +``` + +In the above example, a new transport type "EVPN" is introduced. This is paired with evpnConfiguration section, which +specifies the name of the VTEP CR to use. We will only support specifying a single VTEP to use for EVPN. + +The macVRF field must be provided for Layer2 networks, and cannot be provided for Layer3. ipVRF must be provided for +Layer3, and is optional for Layer2 if an IP-VRF is also desired for the Layer2 UDN. + +The "vni" specified under each will be used to determine the VNID for each EVPN segment. +The VNI values may not overlap between any EVPN CR. + +Furthermore, routeTarget may be configured in order to specify the route target to import/export for the UDN. +The routeTarget field is optional, and if left unset, the routeTarget will be automatically determined in the format +`:`. +If the routeTarget is the same between UDNs of different VRF types (MAC and IP VRFs), the overlapping IDs will have no effect. +However, if routeTarget is the same between UDNs of the same VRF type, then UDN routes will be leaked between UDNs +within the leaves (Kubernetes nodes). It is recommended to handle route leaking within the spine itself, when using +eBGP or iBGP with route reflectors. However, in a full mesh iBGP environment, especially one without a TOR spine fabric, +it is necessary to configure route leaking at the leaves (Kubernetes nodes). This may be accomplished by setting the same +routeTarget value for multiple UDNs, or by adding extra FRR configuration to import other UDN's route targets. + +Additionally, with Layer 2 MAC-VRFs stretched outside the cluster, we need a way to be able to tell OVN-Kubernetes not +to by default allocate certain IPs from the shared subnet. The Layer2 UDN already provides "ReservedSubnets" for this +purpose. If a VM or other application is migrated from the external fabric over the L2 VPN, then it is able to use +a static IP from this ReservedSubnets field. However, when a VM migrates it not only needs to preserve its IP address, +but also its default gateway. OVN-Kubernetes uses the first IP in the subnet as the default gateway for pods. This may +not align with the gateway IP used by a VM off-cluster, and in addition we use the second IP in the subnet as the +ovn-k8s-mpx IP that may also be used on the Layer 2 network off cluster. To address there are other fields already +provided in the Layer2 UDN spec that can be leveraged: +- DefaultGatewayIPs +- InfrastructureSubnets + +These configurable fields should be considered when creating a MAC-VRF that will be extended to outside the cluster. + +### Route Advertisement (RA) CRD changes + +Note, in order to use EVPN, a Route Advertisement CR must also be created which selects the CUDN. +There are no foreseen changes required to the RA CRD. + +### Implementation Details + +#### VTEP + +When a VTEP CR is created in managed mode, ovnkube-cluster-manager will handle +assigning an VTEP IP to each node. If the cidr range includes the Kubernetes node IP for a node, then the node IP will be +used for VTEP IP. Note, using the node IP should be avoided in most cases, as that IP will already be tied to a specific +interface on the node. This prevents proper Layer 3 failure handling. While the node IP on a dedicated link could use +bonding to prevent Layer 2 failover, if something goes wrong on the Layer 3 interface, or the leaf connected to that +link goes down, then there is no failover. +With EVPN, it is advantageous to assign the VTEP IP to a loopback interface, so that multihoming +and failover handling can occur. If a link goes down to one leaf, BFD will fire, and there will be a mass withdrawal of +routes, moving all traffic to a second leaf. + +ovnkube-cluster-manager will handle annotating the node with the assigned IP address. If the VTEP is in unmanaged mode, +then ovnkube-cluster-manager will only handle checking that all nodes have an IP address annotated for this VTEP. If a +node is missing the IP, the VTEP will be updated with a failure status condition. Even if a single node fails, the other +healthy nodes with assigned VTEPs will be configured for EVPN correctly. + +For unmanaged, the ovnkube-node component will handle detecting the IP address on the Linux node and setting the node +annotation. + +#### EVPN + +When a UDN is created with EVPN configuration, the UDN controller in ovnkube-cluster-manager will check to ensure that +a VTEP exists for this EVPN. If one does not, then the NAD will not be rendered and the UDN will be put into error state. +Additionally, a check will be done to ensure that VNIs do not overlap between any other UDNs in the cluster. + +Once the NAD has been rendered, ovnkube-controller and ovnkube-node network controllers will be started. The ovnkube-node +network controller will detect that this network is EVPN enabled, and then create the correct network device configuration +in Linux. + +The BGP RA controller will be responsible for detecting when a BGP RA selects a CUDN that is EVPN enabled. Once it does, +it will generate the proper FRR-K8S configuration. + +For the rest of the examples in this section, assume there is a layer 2 UDN called "blue", with subnet 10.0.10.0/24. + +##### Node Configuration: MAC-VRF + IP-VRF Combination with Layer 2 UDN + +Once the VTEP IP is assigned, ovnkube-node will then handle configuring the following: +```bash +# VTEP IP assignment to loopback - only done in VTEP managed mode +ip addr add 100.64.0.1/32 dev lo + +# SVD bridge + VXLAN setup +ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0 +ip link set br0 addrgenmode none +ip link set br0 address aa:bb:cc:00:00:64 +ip link add vxlan0 type vxlan dstport 4789 local 100.64.0.1 nolearning external vnifilter +ip link set vxlan0 addrgenmode none master br0 +ip link set vxlan0 address aa:bb:cc:00:00:64 +ip link set br0 up +ip link set vxlan0 up +bridge link set dev vxlan0 vlan_tunnel on neigh_suppress on learning off + +# Create the IP-VRF +# Map VLAN 11 <-> VNI 101 +bridge vlan add dev br0 vid 11 self +bridge vlan add dev vxlan0 vid 11 +bridge vni add dev vxlan0 vni 101 +bridge vlan add dev vxlan0 vid 11 tunnel_info id 101 + +# 802.1Q sub-interface for routing +ip link add br0.11 link br0 type vlan id 11 +ip link set br0.11 address aa:bb:cc:00:00:64 addrgenmode none + +# Bind to the UDN VRF +ip link add blue type vrf table 10 +ip link set br0.11 master blue +ip link set br0.11 up +ip link set blue up + +## Create the MAC-VRF +# 1. Map VLAN 12 <-> VNI 100 on the SVD bridge +bridge vlan add dev br0 vid 12 self +bridge vlan add dev vxlan0 vid 12 +bridge vni add dev vxlan0 vni 100 +bridge vlan add dev vxlan0 vid 12 tunnel_info id 100 + +# 2. Connect OVS to the Linux Bridge +ovs-vsctl add-port br-int blue -- set interface blue type=internal external-ids:iface-id=blue +ip link set blue master br0 +bridge vlan add dev blue vid 12 pvid untagged +ip link set blue up +``` + +The Linux configuration ends up looking like this: + +```ascii + +┌────────────────────────────── OVN-KUBERNETES DOMAIN ───────────────────────────────┐ +│ │ +│ +-----------+ +------------------+ +--------------------+ │ +│ | Pod(s) |--------| Logical Switch | | Logical Router | │ +│ +-----------+ +------------------+ +--------------------+ │ +│ │ │ +│ │ │ +│ +-------------------------------------+ +-----------------------------+ │ +│ | OVS br-int | | ovn-k8s-mpx (host iface) | │ +│ | (OVN datapath for this UDN) | | attached to VRF blue | │ +│ +-------------------------------------+ +-----------------------│-----+ │ +│ │ (blue internal port) │ │ +└─────────────────────┼──────────────────────────────────────────────────────┼──────┬┘ + │ │ + │ │ + │ │ + │ │ + │ │ + │ │ +┌─────────────────────┼─────────────────────── LINUX HOST / EVPN STACK ──────┼──────────────────────────────────────┐ +│ │ │ │ +│ ________| │ │ +│ │ │ │ +│ │ │ │ +│ │ vlan 12 (VNI 100) │ │ +│ ▼ │ │ +│ +----------------------+ +----------------------------------+ +▼------------------------------+ │ +│ | br0 (SVD) |──────| br0.11 (VLAN 11 / L3VNI 101) |──────>| VRF blue (IP-VRF for UDN) | │ +│ | vlan_filtering=1 | +----------------------------------+ +-------------------------------+ │ +│ | vxlan0 master port | ▲ │ +│ +----------------------+ │ │ +│ │ vlan→vni mappings │ │ +│ ▼ │ │ +│ +-----------------+ │ │ +│ | vxlan0 | local 100.64.0.1 dstport 4789 external │ │ +│ +-----------------+ │ │ +│ │ (VTEP) │ │ +│ └────────────────────────────────────────────────────────────────────────────────────────────┘ │ +│ Encapsulated EVPN traffic (VNI 100 & 101) │ +│ via 100.64.0.1 <-> 100.64.0.x peers │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────── │ +``` + +The MAC address for the bridge is unique and will be automatically generated by OVN-Kubernetes. This MAC address is +known as the "router-mac" and is used by Type 5 routes to know what the destination MAC of the next hop should be. The +MAC can be the same on every node, but must be different per UDN. + +Furthermore, bridge and VXLAN link names may also change and will be decided by OVN-Kubernetes. + +While the IP-VRF uses pure routing to transmit traffic over the EVPN fabric, MAC-VRF relies on layer 2. +For that reason, the layer 2 OVN network needs to be extended into the EVPN fabric. +To do that, we connect br-int to the linux bridge for the MAC-VRF. This will allow layer 2 traffic to +travel through br-blue and then eventually into the EVPN fabric via the VNID 100. This enables a user to disable the +Geneve overlay and allow L2 communication between UDNs on different nodes via the MAC-VRF: + + +![](images/l2evpn.png) + +Note, the Layer 2 domain for the MAC-VRF may be extended into the provider's physical network, and not just extended across +Kubernetes nodes. This allows for VM migration and other layer 2 connectivity between entities external to the cluster +and entities within. + +ovnkube-controller will be responsible for configuring OVN, including the extra OVS internal port attached to the worker logical +switch. + +In addition to VTEP IP allocation, ovnkube-cluster-manager will be responsible for generating FRR-K8S config to enable +FRR with EVPN. The config for the above example would look something like this: + +```bash +vrf blue + vni 101 + rd 65000:101 +exit-vrf +! +router bgp 65000 + ! + ! Peer with spine using eBGP + neighbor 192.168.122.12 remote-as 65001 + ! + address-family ipv4 unicast + network 100.64.0.1/32 + exit-address-family + ! MAC-VRF config start + address-family l2vpn evpn + vni 100 + rd 65000:100 + route-target import 65000:100 + route-target export 65000:100 + ! + neighbor 192.168.122.12 activate + advertise-all-vni + exit-address-family +exit +! MAC-VRF config end +! IP-VRF config start +router bgp 65000 vrf blue + ! + address-family ipv4 unicast + network 10.0.10.0/24 + exit-address-family + ! + ! + address-family l2vpn evpn + advertise ipv4 unicast + route-target import 65000:101 + route-target export 65000:101 + exit-address-family +exit +! IP-VRF config end +``` +The MAC-VRF configuration for the Layer 2 UDN in the example above is contained within the `address-family l2vpn evpn` +stanza under the global `router bgp 65000`, denoted with the comments showing the beginning and end of the MAC-VRF +configuration. +FRR automatically detects VNIs via netlink via the `advertise-all-vni` configuration, and it is not required to specify +the MAC-VRFs that should use EVPN. However, the `vni 100` section explicitly lists the vni in order to explicitly +configure the route-target. The `rd 65000:100` line specifies the route distiguisher. The route distinguisher is used +uniquely identify routes for each VPN. This value is chosen automatically by OVN-Kubernetes in the format +`:`, which is the same format used by FRR itself. +The entirety of this configuration section will enable Type 2 and Type 3 EVPN routes to be advertised. + +The IP-VRF configuration starts with the `router bgp 65000 vrf blue` stanza. +The stanza indicates to FRR that VNI 101 is an IP-VRF and that it should advertise type 5 routes +for the subnet `10.0.10.0/24`. Notice that the entire Layer 2 UDN subnet is +advertised from each node. This can result in suboptimal routing as there may be an extra hop involved to deliver packets +from the ECMP chosen path for 10.244.0.0/16 to the node where the pod actually lives. This can be mitigated in the future +by advertising either static or kernel /32 routes for each pod IP on the node, however that is outside the scope of this +enhancement. + +The RouteAdvertisements CRD will still work in conjunction with the EVPN CRD to determine what IPs should be advertised. + +#### IP-VRF + Layer 3 UDN + +A Layer 3 UDN with an IP-VRF is really just a subset of the previous example, as far as configuration of the node: + +```bash +# VTEP IP assignment to loopback - only done in VTEP managed mode +ip addr add 100.64.0.1/32 dev lo + +# SVD bridge + VXLAN setup +ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0 +ip link set br0 addrgenmode none +ip link set br0 address aa:bb:cc:00:00:64 +ip link add vxlan0 type vxlan dstport 4789 local 100.64.0.1 nolearning external vnifilter +ip link set vxlan0 addrgenmode none master br0 +ip link set vxlan0 address aa:bb:cc:00:00:64 +ip link set br0 up +ip link set vxlan0 up +bridge link set dev vxlan0 vlan_tunnel on neigh_suppress on learning off + +# Create the IP-VRF +# Map VLAN 11 <-> VNI 101 +bridge vlan add dev br0 vid 11 self +bridge vlan add dev vxlan0 vid 11 +bridge vni add dev vxlan0 vni 101 +bridge vlan add dev vxlan0 vid 11 tunnel_info id 101 + +# 802.1Q sub-interface for routing +ip link add br0.11 link br0 type vlan id 11 +ip link set br0.11 address aa:bb:cc:00:00:64 addrgenmode none + +# Bind to the UDN VRF +ip link add blue type vrf table 10 +ip link set br0.11 master blue +ip link set br0.11 up +ip link set blue up +``` + +Note, it is not required to wire the OVN logical switch to the linux bridge in this case. It is also not required to +modify routes in ovn_cluster_router. Pod egress traffic should be rerouted towards mpx as is done today with BGP. + +The FRR configuration remains almost the same as the previous example, but with IP-VRF we no longer need the +`address-family l2vpn evpn` section under the global `router bgp 65000` section anymore: + +```bash +vrf blue + vni 101 + rd 65000:101 +exit-vrf +! +router bgp 65000 + ! + ! Peer with spine using eBGP + neighbor 192.168.122.12 remote-as 65001 + ! + address-family ipv4 unicast + network 100.64.0.1/32 + exit-address-family +exit +! IP-VRF config start +router bgp 65000 vrf blue + ! + address-family ipv4 unicast + network 10.0.10.0/24 + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + route-target import 65000:101 + route-target export 65000:101 + exit-address-family +exit +! IP-VRF config end +``` + +An IP-VRF with a layer 3 UDN would look something like this: + +![](images/l3evpn.png) + +In this case each node has its own layer 2 domain, and routing is used via the IP-VRF for inter-node UDN communication. + +#### MAC-VRF + Layer 2 UDN + +With only a MAC-VRF it is also a subset of the previous node configuration: + +```bash +# VTEP IP assignment to loopback - only done in VTEP managed mode +ip addr add 100.64.0.1/32 dev lo + +# SVD bridge + VXLAN setup +ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0 +ip link set br0 addrgenmode none +ip link set br0 address aa:bb:cc:00:00:64 +ip link add vxlan0 type vxlan dstport 4789 local 100.64.0.1 nolearning external vnifilter +ip link set vxlan0 addrgenmode none master br0 +ip link set vxlan0 address aa:bb:cc:00:00:64 +ip link set br0 up +ip link set vxlan0 up +bridge link set dev vxlan0 vlan_tunnel on neigh_suppress on learning off + +## Create the MAC-VRF +# 1. Map VLAN 12 <-> VNI 100 on the SVD bridge +bridge vlan add dev br0 vid 12 self +bridge vlan add dev vxlan0 vid 12 +bridge vni add dev vxlan0 vni 100 +bridge vlan add dev vxlan0 vid 12 tunnel_info id 100 + +# 2. Connect OVS to the Linux Bridge +ovs-vsctl add-port br-int blue -- set interface blue type=internal external-ids:iface-id=blue +ip link set blue master br0 +bridge vlan add dev blue vid 12 pvid untagged +ip link set blue up +``` + +The FRR configuration is also a subset of the original config: + +```bash +router bgp 65000 + ! Peer with spine using eBGP + neighbor 192.168.122.12 remote-as 65001 + ! + address-family ipv4 unicast + network 100.64.0.1/32 + exit-address-family + ! + address-family l2vpn evpn + vni 100 + rd 65000:100 + route-target import 65000:100 + route-target export 65000:100 + ! + neighbor 192.168.122.12 activate + advertise-all-vni + exit-address-family +exit +``` + +Notice the vrf stanzas are no longer needed. + +Architecturally, the traffic pattern and topology will look the same as the diagram in the +[MAC-VRF + IP-VRF Combination with Layer 2 UDN](#l2evpn-anchor) section. + +### Feature Compatibility + +#### Multiple External Gateways (MEG) + +Not supported. + +#### Egress IP + +Not supported. + +#### Services + +Full support for cluster IP access from pods. Limited support for external service access (node port, external IP, +load balancer IP). +MetalLB does not currently have a way to specify per VRF advertisements to the same BGP peer. Therefore, +for users to advertise an external IP or LoadBalancer IP, they must configure FRR-K8S to manually advertise it. A user +may also configure FRR-K8S to advertise cluster IP as well externally into the EVPN fabric. + +MetalLB may be extended in the future to support per-VRF advertisement. + +#### Egress Service + +Not supported. + +#### Egress Firewall + +Full support. + +#### Egress QoS + +Full support. + +#### Network Policy/ANP + +Full support. + +#### IPSec + +Not supported. + +#### Multicast + +IP-VRF will not be supported for Multicast. Although OVN can handle router forwarding of multicast traffic, the kernel +routing will not be configured correctly. Multicast within IP-VRF is a less common use case than MAC-VRF. Multicast +within a MAC-VRF will be supported. The OVN logical switch will handle IGMP snooping and forwarding unknown multicast +with the logical_switch configuration set to `other_config:mcast_flood_unregistered=true` and `other_config:mcast_snoop=true`. +The linux bridge will only have the VXLAN VTEP and OVN internal port connected to it, so it is not necessary to configure +IGMP snooping there. Type 3 EVPN routes will be sent to announce VPN/VTEP membership and trigger ingress replication to +flood multicast packets to the right VTEPs across the fabric. The linux bridge will simply "flood" the packet to the OVS +internal port to the OVN logical switch. The OVN logical switch will then rely on IGMP snooping to limit which pods to +send the packet to. + +### Testing Details + +The EVPN feature will require E2E tests to be written which will simulate a spine and leaf topology that the KIND Kubernetes +nodes are attached to. From there tests will be added that will create UDN+BGP+EVPN and test the following: + +1. UDN pods are able to talk to external applications on the same VPN. +2. UDN pods are unable to talk to external applications on a different VPN. +3. UDN pods are able to talk to other UDN pods on the same network without a Geneve overlay via EVPN. +4. The above tests will apply for both IP-VRF and MAC-VRF EVPN types. +5. For IP+MAC-VRF, a test will be added to ensure VM migration between two Kubernetes nodes with EVPN. This includes +ensuring TCP connections are not broken as well as minimal packet loss during migration. +6. Testing with Multicast on MAC-VRFs. + +### Documentation Details + +BGP documentation (including a user guide) needs to be completed first with details around how to configure with UDN. +Following up on that EVPN documentation will be added to show users how configure EVPN and have it integrate with a +spine and leaf topology. + +## Risks, Known Limitations and Mitigations + +Interoperability with external provider networks EVPN infrastructure. Although BGP+EVPN is a standardized protocol, there +may be nuances where certain features are not available or do not work as expected in FRR. There is no current FRR +development expertise in our group, so we will have to rely on FRR community for help as we ramp up. + +Same drawbacks exist here that are highlighted in the BGP enhancement. Namely: + +* Increased complexity of our SDN networking solution to support more complex networking. +* Increases support complexity due to integration with the user's provider network. + +Other considerations include FRR deployment. If the default cluster network is relying on EVPN or BGP to provide network +connectivity, then FRR must be started and bootstrapped by the time the kubelet comes up. This includes considerations around +node reboot, as well as fresh cluster install. The means to manage, deploy and maintain FRR is outside the scope of +OVN-Kubernetes, but may be handled by another platform specific operator. For example, MetalLB may be used to install +FRR for day 2 operations. + +Limitations include support for Local gateway mode only. + +There are other aspects to consider around Kubernetes services. Today OVN-Kubernetes works with MetalLB, and MetalLB is +responsible for advertising externally exposed services across the BGP fabric. In OVN-Kubernetes we treat services as though +they belong to a specific UDN. This is due to the fact that a service is namespace scoped, and namespace belongs to either +the default cluster network or a UDN. However, when a user exposes a service externally via a nodeport, loadbalancer, or +external IP; that service is now reachable externally over the default VRF (advertised via MetalLB). MetalLB has no concept +of VRFs or UDNs, but it could be extended to allow advertising services in different VRFs. Until this support exists, +external services may not be advertised by MetalLB over non-default VRF EVPNs. However, it may be desirable for OVN-Kubernetes +to fill this void somewhat, by advertising the cluster IP of services on UDNs via BGP. This can be a future enhancement after +we figure out what can be done in MetalLB. + +MEG will not be supported with EVPN as MEG is only supported in shared gateway mode, while EVPN is limited to local gateway mode. + +## OVN Kubernetes Version Skew + +TBD + +## Alternatives + +There is another alternative called [OpenPERouter](https://openperouter.github.io/) which manages EVPN FRR configuration +in another Linux network namespace. A VRF-Lite style configuration is done by connecting veth devices between the default +Linux network namespace and the OpenPERouter namespace. The FRR in the default network namespace peers with this +OpenPERouter in order to advertise routes from the host. For OVN-Kubernetes to integrate, we would need to configure +VRF-Lite on our side and connect to the OpenPERouter. While this does provide an easy way to plugin, the OpenPERouter +approach has several major drawbacks: + +1. Lack of OVN integration - We eventually want shared gateway mode to work, where OVN will be handling encapsulation of +packets. This enables things like hardware offload. This is not possible with OpenPERouter. + +2. Management complexity - Adding additional FRR instance running in another network namespace, and plumbing virtual +interfaces between the host/OVN and this other namespace increases resource footprint, and makes it harder to debug and +manage. The Linux interfaces on the host must be moved to this other namespace as well. + +For OVN-Kubernetes, the cost of implementing the code to manage the netdevs, and configure FRR-K8S outweighs the +drawbacks listed above, and therefore OpenPERouter is not a viable alternative. + +## References + +- [FRR EVPN Configuration Guide](https://docs.frrouting.org/en/latest/evpn.html) diff --git a/go-controller/hack/build-go.sh b/go-controller/hack/build-go.sh index 13a12e0bf9..b84a4a2c2f 100755 --- a/go-controller/hack/build-go.sh +++ b/go-controller/hack/build-go.sh @@ -16,8 +16,8 @@ build_binaries() { # Add a buildid to the executable - needed by rpmbuild BUILDID=${BUILDID:-0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \n')} - GIT_COMMIT=$(git rev-parse HEAD) - GIT_BRANCH=$(git rev-parse --symbolic-full-name --abbrev-ref HEAD) + GIT_COMMIT=${GIT_COMMIT:-$(git rev-parse HEAD 2>/dev/null || echo "unknown")} + GIT_BRANCH=${GIT_BRANCH:-$(git rev-parse --symbolic-full-name --abbrev-ref HEAD 2>/dev/null || echo "unknown")} BUILD_USER=$(whoami) BUILD_DATE=$(date +"%Y-%m-%d") K8S_CLIENT_VERSION=$(grep 'k8s.io/client-go' ${OVN_KUBE_ROOT}/go.mod | head -1 |cut -f2 -d' ') diff --git a/go-controller/hack/test-go.sh b/go-controller/hack/test-go.sh index f2df39f672..c5f6341dc8 100755 --- a/go-controller/hack/test-go.sh +++ b/go-controller/hack/test-go.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash set -e +set -o pipefail source "$(dirname "${BASH_SOURCE}")/init.sh" diff --git a/go-controller/pkg/allocator/ip/allocator.go b/go-controller/pkg/allocator/ip/allocator.go index 7205ecc677..4228a60b5e 100644 --- a/go-controller/pkg/allocator/ip/allocator.go +++ b/go-controller/pkg/allocator/ip/allocator.go @@ -55,6 +55,11 @@ func IsErrAllocated(err error) bool { return errors.Is(err, ErrAllocated) } +// IsErrFull returns true if err is of type ErrFull +func IsErrFull(err error) bool { + return errors.Is(err, ErrFull) +} + type ErrNotInRange struct { ValidRange string } diff --git a/go-controller/pkg/allocator/pod/pod_annotation.go b/go-controller/pkg/allocator/pod/pod_annotation.go index c5d150f519..5e687e1a00 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation.go +++ b/go-controller/pkg/allocator/pod/pod_annotation.go @@ -445,6 +445,16 @@ func allocatePodAnnotationWithRollback( hasIPAMClaim = ipamClaim != nil && len(ipamClaim.Status.IPs) > 0 } + defer func() { + if ipamClaim == nil || claimsReconciler == nil { + return + } + updatedClaim := claimsReconciler.UpdateIPAMClaimStatus(ipamClaim, podAnnotation, pod.Name, err) + if reconcileErr := claimsReconciler.Reconcile(ipamClaim, updatedClaim, ipAllocator); reconcileErr != nil { + err = errors.Join(err, fmt.Errorf("failed to reconcile IPAM claim %s/%s: %w", ipamClaim.Namespace, ipamClaim.Name, reconcileErr)) + } + }() + if hasIPAM && hasStaticIPRequest { if err = validateStaticIPRequest(netInfo, network, ipamClaim, podDesc); err != nil { return @@ -548,12 +558,6 @@ func allocatePodAnnotationWithRollback( podAnnotation = tentative } - if ipamClaim != nil && err == nil { - newIPAMClaim := ipamClaim.DeepCopy() - newIPAMClaim.Status.IPs = util.StringSlice(podAnnotation.IPs) - err = claimsReconciler.Reconcile(ipamClaim, newIPAMClaim, ipAllocator) - } - return } diff --git a/go-controller/pkg/allocator/pod/pod_annotation_test.go b/go-controller/pkg/allocator/pod/pod_annotation_test.go index 608c29f7e2..da7c1979bc 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation_test.go +++ b/go-controller/pkg/allocator/pod/pod_annotation_test.go @@ -86,6 +86,17 @@ func (c *persistentIPsStub) FindIPAMClaim(claimName string, namespace string) (* return &ipamClaim, nil } +func (c *persistentIPsStub) UpdateIPAMClaimStatus(ipamClaim *ipamclaimsapi.IPAMClaim, podAnnotation *util.PodAnnotation, podName string, allocationErr error) *ipamclaimsapi.IPAMClaim { + updatedClaim := ipamClaim.DeepCopy() + updatedClaim.Status.OwnerPod = &ipamclaimsapi.OwnerPod{Name: podName} + if allocationErr != nil { + updatedClaim.Status.IPs = []string{} + } else if podAnnotation != nil && len(podAnnotation.IPs) > 0 { + updatedClaim.Status.IPs = util.StringSlice(podAnnotation.IPs) + } + return updatedClaim +} + func ipamClaimKey(namespace string, claimName string) string { return fmt.Sprintf("%s/%s", namespace, claimName) } diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 3ca660c9f5..de03ddea0b 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -16,11 +16,13 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/dnsnameresolver" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/egressservice" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/endpointslicemirror" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/networkconnect" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/routeadvertisements" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/status_manager" udncontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork" udntemplate "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + networkconnectclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" @@ -48,6 +50,8 @@ type ClusterManager struct { dnsNameResolverController *dnsnameresolver.Controller // Controller for managing user-defined-network CRD userDefinedNetworkController *udncontroller.Controller + // Controller for managing cluster-network-connect CRD + networkConnectController *networkconnect.Controller // event recorder used to post events to k8s recorder record.EventRecorder @@ -89,12 +93,13 @@ func NewClusterManager( } cm.networkManager = networkmanager.Default() + var tunnelKeysAllocator *id.TunnelKeysAllocator if config.OVNKubernetesFeature.EnableMultiNetwork { // tunnelKeysAllocator is now only used for NAD tunnel keys allocation, but will be reused // for Connecting UDNs. So we initialize it here and pass it to the networkManager. // The same instance should be initialized only once and passed to all the // users of tunnel-keys. - tunnelKeysAllocator, err := initTunnelKeysAllocator(ovnClient.NetworkAttchDefClient) + tunnelKeysAllocator, err = initTunnelKeysAllocator(ovnClient.NetworkAttchDefClient, ovnClient.NetworkConnectClient) if err != nil { return nil, fmt.Errorf("failed to initialize tunnel keys allocator: %w", err) } @@ -171,6 +176,10 @@ func NewClusterManager( } } + if util.IsNetworkConnectEnabled() { + cm.networkConnectController = networkconnect.NewController(wf, ovnClient, cm.networkManager.Interface(), tunnelKeysAllocator) + } + if util.IsRouteAdvertisementsEnabled() { cm.raController = routeadvertisements.NewController(cm.networkManager.Interface(), wf, ovnClient) } @@ -233,6 +242,12 @@ func (cm *ClusterManager) Start(ctx context.Context) error { } } + if cm.networkConnectController != nil { + if err := cm.networkConnectController.Start(); err != nil { + return err + } + } + if cm.raController != nil { err := cm.raController.Start() if err != nil { @@ -265,6 +280,9 @@ func (cm *ClusterManager) Stop() { if util.IsNetworkSegmentationSupportEnabled() { cm.userDefinedNetworkController.Shutdown() } + if cm.networkConnectController != nil { + cm.networkConnectController.Stop() + } if cm.raController != nil { cm.raController.Stop() cm.raController = nil @@ -294,7 +312,7 @@ func (cm *ClusterManager) Reconcile(name string, old, new util.NetInfo) error { // It will be shared across multiple controllers and should account for different object types. // Good news is that we don't care about missing events, because we only need to reserve ids that are already // annotated, and no one else can annotate them except ClusterManager. -func initTunnelKeysAllocator(nadClient networkattchmentdefclientset.Interface) (*id.TunnelKeysAllocator, error) { +func initTunnelKeysAllocator(nadClient networkattchmentdefclientset.Interface, cncClient networkconnectclientset.Interface) (*id.TunnelKeysAllocator, error) { tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") existingNADs, err := nadClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("").List(context.TODO(), metav1.ListOptions{}) @@ -323,5 +341,22 @@ func initTunnelKeysAllocator(nadClient networkattchmentdefclientset.Interface) ( } } } + if util.IsNetworkConnectEnabled() { + existingCNCs, err := cncClient.K8sV1().ClusterNetworkConnects().List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list existing CNCs: %w", err) + } + for _, cnc := range existingCNCs.Items { + tunnelID, err := util.ParseNetworkConnectTunnelKeyAnnotation(&cnc) + if err != nil { + return nil, fmt.Errorf("failed to parse annotated tunnel ID: %w", err) + } + if tunnelID != 0 { + if err = tunnelKeysAllocator.ReserveKeys(cnc.Name, []int{tunnelID}); err != nil { + return nil, fmt.Errorf("failed to reserve tunnel ID %d for CNC %s: %w", tunnelID, cnc.Name, err) + } + } + } + } return tunnelKeysAllocator, nil } diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index f1bcb28a16..58e234e67f 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -18,6 +18,9 @@ import ( hotypes "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + networkconnect "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + apitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/generator/udn" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -860,7 +863,7 @@ var _ = ginkgo.Describe("Cluster Manager", func() { clientSet := util.GetOVNClientset(nad1, nad2) // init the allocator that should reserve already allocated keys for test1 - allocator, err := initTunnelKeysAllocator(clientSet.NetworkAttchDefClient) + allocator, err := initTunnelKeysAllocator(clientSet.NetworkAttchDefClient, clientSet.NetworkConnectClient) gomega.Expect(err).NotTo(gomega.HaveOccurred()) // check that reserving different keys for test2 will fail err = allocator.ReserveKeys("test1", []int{16711685, 16715779}) @@ -887,6 +890,330 @@ var _ = ginkgo.Describe("Cluster Manager", func() { }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + + ginkgo.It("check for CNC tunnel keys allocations", func() { + app.Action = func(_ *cli.Context) error { + config.OVNKubernetesFeature.EnableNetworkConnect = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableMultiNetwork = true + // CNC uses networkID 4097 (4096+1) which allocates from the idsAllocator range + // The idsAllocator starts at 16715779 (16711683 + 4096) + // create CNC with already allocated tunnel key + cnc1 := &networkconnect.ClusterNetworkConnect{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc1", + Annotations: map[string]string{ + util.OvnConnectRouterTunnelKeyAnnotation: "16715779", + }, + }, + } + // create CNC without tunnel key annotation + cnc2 := &networkconnect.ClusterNetworkConnect{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc2", + }, + } + clientSet := util.GetOVNClientset(cnc1, cnc2) + + // init the allocator that should reserve already allocated key for cnc1 + allocator, err := initTunnelKeysAllocator(clientSet.NetworkAttchDefClient, clientSet.NetworkConnectClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // check that reserving different keys for cnc1 will fail + err = allocator.ReserveKeys("cnc1", []int{16715780}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("can't reserve ids [16715780] for the resource cnc1. It is already allocated with different ids [16715779]")) + // now try to allocate key for cnc1 (using networkID 4097 as CNCs do) + // and check that returned ID is the already reserved one + ids, err := allocator.AllocateKeys("cnc1", 4097, 1) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ids).To(gomega.Equal([]int{16715779})) + // now allocate id for cnc2 (which had no annotation, also using networkID 4097) + ids, err = allocator.AllocateKeys("cnc2", 4097, 1) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ids).To(gomega.Equal([]int{16715780})) + // now try cnc3 to make sure IDs of cnc1 and cnc2 are not allocated again + ids, err = allocator.AllocateKeys("cnc3", 4097, 1) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ids).To(gomega.Equal([]int{16715781})) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("check for combined NAD and CNC tunnel keys allocations", func() { + app.Action = func(_ *cli.Context) error { + config.OVNKubernetesFeature.EnableNetworkConnect = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableMultiNetwork = true + // create NAD with already allocated tunnel keys + // NAD with networkID 2 gets keys: [16711685 (preserved), 16715779 (idsAllocator)] + nad1 := testing.GenerateNAD("test1", "test1", "test", ovntypes.Layer2Topology, + "10.0.0.0/24", ovntypes.NetworkRolePrimary) + nad1.Annotations = map[string]string{ + ovntypes.OvnNetworkTunnelKeysAnnotation: "[16711685,16715779]", + } + // create CNC with already allocated tunnel key + // CNC uses networkID 4097, so it gets keys from idsAllocator range (16715779+) + cnc1 := &networkconnect.ClusterNetworkConnect{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc1", + Annotations: map[string]string{ + util.OvnConnectRouterTunnelKeyAnnotation: "16715780", + }, + }, + } + clientSet := util.GetOVNClientset(nad1, cnc1) + + // init the allocator that should reserve keys for both NAD and CNC + allocator, err := initTunnelKeysAllocator(clientSet.NetworkAttchDefClient, clientSet.NetworkConnectClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // verify NAD keys are reserved (networkID 2 => first key from preserved range) + ids, err := allocator.AllocateKeys("test1", 2, 2) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ids).To(gomega.Equal([]int{16711685, 16715779})) + // verify CNC key is reserved (networkID 4097 => all keys from idsAllocator) + ids, err = allocator.AllocateKeys("cnc1", 4097, 1) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ids).To(gomega.Equal([]int{16715780})) + // test conflict: CNC tries to reserve NAD's random pool key (16715779) + err = allocator.ReserveKeys("conflicting-cnc", []int{16715779}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("already reserved")) + // test conflict: NAD tries to reserve CNC's key (16715780) + err = allocator.ReserveKeys("conflicting-nad", []int{16715780}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("already reserved")) + // allocate new keys for a new NAD (networkID 3) and ensure reserved keys are not reused + ids, err = allocator.AllocateKeys("newnetwork", 3, 2) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // first key: 16711686 (preserved range for networkID 3) + // second key: 16715781 (skipping 16715779 for test1 and 16715780 for cnc1) + gomega.Expect(ids).To(gomega.Equal([]int{16711686, 16715781})) + // allocate new keys for a resource with networkID > 4096 (like CNCs do) + // this should get ALL keys from the random pool, no deterministic key + ids, err = allocator.AllocateKeys("newresource", 4097, 2) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // both keys from random pool: 16715782, 16715783 (skipping all previously allocated) + gomega.Expect(ids).To(gomega.Equal([]int{16715782, 16715783})) + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("CNC tunnel key and subnet allocations at cluster manager (re)start", func() { + app.Action = func(ctx *cli.Context) error { + // Create two namespaces that the CNC's Primary UDN selector will match + // Note: k8s.ovn.org/primary-user-defined-network label is required for primary UDNs + ns1 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "frontend-ns", + Labels: map[string]string{ + "tier": "frontend", + "k8s.ovn.org/primary-user-defined-network": "", + }, + }, + } + ns2 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "backend-ns", + Labels: map[string]string{ + "tier": "backend", + "k8s.ovn.org/primary-user-defined-network": "", + }, + }, + } + + // Create two primary UDNs - the UDN controller will create the corresponding NADs + // UDN1 gets network ID 2 (layer3_2), UDN2 gets network ID 3 (layer3_3) + udn1 := &udnv1.UserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "frontend-udn", + Namespace: "frontend-ns", + }, + Spec: udnv1.UserDefinedNetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: []udnv1.Layer3Subnet{ + {CIDR: "10.128.0.0/16", HostSubnet: 24}, + }, + }, + }, + } + udn2 := &udnv1.UserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "backend-udn", + Namespace: "backend-ns", + }, + Spec: udnv1.UserDefinedNetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: []udnv1.Layer3Subnet{ + {CIDR: "10.129.0.0/16", HostSubnet: 24}, + }, + }, + }, + } + + // Create a CNC with pre-populated tunnel key annotation + // This simulates a cluster manager restart scenario where tunnel key was already allocated + // Note: We don't pre-populate subnet annotation because network IDs are assigned dynamically + cnc := &networkconnect.ClusterNetworkConnect{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cnc", + Annotations: map[string]string{ + // Pre-populate tunnel key (CNC uses networkID 4097+, so from idsAllocator range) + util.OvnConnectRouterTunnelKeyAnnotation: "16715781", + }, + }, + Spec: networkconnect.ClusterNetworkConnectSpec{ + NetworkSelectors: apitypes.NetworkSelectors{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "tier", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"frontend", "backend"}, + }, + }, + }, + }, + }, + }, + ConnectSubnets: []networkconnect.ConnectSubnet{ + {CIDR: "192.168.0.0/16", NetworkPrefix: 24}, + }, + Connectivity: []networkconnect.ConnectivityType{networkconnect.PodNetwork}, + }, + } + + kubeFakeClient := fake.NewSimpleClientset(ns1, ns2) + fakeClient := util.GetOVNClientset(udn1, udn2, cnc) + fakeClient.KubeClient = kubeFakeClient + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableNetworkConnect = true + + f, err = factory.NewClusterManagerWatchFactory(fakeClient.GetClusterManagerClientset()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = f.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + c, cancel := context.WithCancel(ctx.Context) + defer cancel() + clusterManager, err := NewClusterManager(fakeClient.GetClusterManagerClientset(), f, "identity", nil) + gomega.Expect(clusterManager).NotTo(gomega.BeNil()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = clusterManager.Start(c) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + defer clusterManager.Stop() + + // Verify that cluster manager preserved the tunnel key annotation on the CNC + gomega.Eventually(func() (int, error) { + updatedCNC, err := fakeClient.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.TODO(), "test-cnc", metav1.GetOptions{}) + if err != nil { + return 0, err + } + tunnelKeyStr := updatedCNC.Annotations[util.OvnConnectRouterTunnelKeyAnnotation] + if tunnelKeyStr == "" { + return 0, fmt.Errorf("tunnel key annotation not set") + } + return strconv.Atoi(tunnelKeyStr) + }, 5).Should(gomega.Equal(16715781)) // Should preserve the pre-populated value + + // Wait for NADs to be created by UDN controller and get their network IDs + var frontendNetworkID, backendNetworkID string + gomega.Eventually(func() error { + nad1, err := fakeClient.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("frontend-ns").Get( + context.TODO(), "frontend-udn", metav1.GetOptions{}) + if err != nil { + return err + } + nad2, err := fakeClient.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("backend-ns").Get( + context.TODO(), "backend-udn", metav1.GetOptions{}) + if err != nil { + return err + } + frontendNetworkID = nad1.Annotations[ovntypes.OvnNetworkIDAnnotation] + backendNetworkID = nad2.Annotations[ovntypes.OvnNetworkIDAnnotation] + if frontendNetworkID == "" || backendNetworkID == "" { + return fmt.Errorf("network IDs not yet assigned") + } + return nil + }, 10).Should(gomega.Succeed()) + + // Verify that cluster manager allocated subnets for both networks + // Use the actual network IDs from the NADs + gomega.Eventually(func() (string, error) { + updatedCNC, err := fakeClient.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.TODO(), "test-cnc", metav1.GetOptions{}) + if err != nil { + return "", err + } + return updatedCNC.Annotations["k8s.ovn.org/network-connect-subnet"], nil + }, 10).Should(gomega.SatisfyAll( + // Should have subnet allocations for both networks using their actual network IDs + gomega.ContainSubstring(fmt.Sprintf("layer3_%s", frontendNetworkID)), + gomega.ContainSubstring(fmt.Sprintf("layer3_%s", backendNetworkID)), + // Both subnets should be from the connect subnet range (192.168.0.0/16) with /24 prefix + gomega.MatchRegexp(`"layer3_\d+":\{"ipv4":"192\.168\.\d+\.0/24"\}.*"layer3_\d+":\{"ipv4":"192\.168\.\d+\.0/24"\}`), + )) + + // Verify the tunnel key is preserved after CNC update (triggers re-reconciliation) + // Update CNC with a label to trigger reconciliation + updatedCNC, err := fakeClient.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.TODO(), "test-cnc", metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + if updatedCNC.Labels == nil { + updatedCNC.Labels = make(map[string]string) + } + updatedCNC.Labels["test-update"] = "trigger-reconcile" + _, err = fakeClient.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Update( + context.TODO(), updatedCNC, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify tunnel key is still preserved after reconciliation + gomega.Eventually(func() (int, error) { + cnc, err := fakeClient.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.TODO(), "test-cnc", metav1.GetOptions{}) + if err != nil { + return 0, err + } + tunnelKeyStr := cnc.Annotations[util.OvnConnectRouterTunnelKeyAnnotation] + if tunnelKeyStr == "" { + return 0, fmt.Errorf("tunnel key annotation not set") + } + return strconv.Atoi(tunnelKeyStr) + }, 5).Should(gomega.Equal(16715781)) // Should still be the same pre-populated value + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) ginkgo.Context("Node gateway router port IP allocations", func() { diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go index d7296fc8ee..7b0f2a2e39 100644 --- a/go-controller/pkg/clustermanager/egressip_controller.go +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -18,6 +18,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" @@ -211,6 +212,31 @@ func (eIPC *egressIPClusterController) executeCloudPrivateIPConfigOps(egressIPNa if cloudPrivateIPConfig.GetDeletionTimestamp() != nil && !cloudPrivateIPConfig.GetDeletionTimestamp().IsZero() { return fmt.Errorf("cloud update request failed, CloudPrivateIPConfig: %s is being deleted", cloudPrivateIPConfigName) } + + // Handle a scenario in which the object exists in a failed state by removing it if the node it was assigned to no longer exists + assignedCondition := meta.FindStatusCondition(cloudPrivateIPConfig.Status.Conditions, string(ocpcloudnetworkapi.Assigned)) + if cloudPrivateIPConfig.Status.Node != "" && assignedCondition != nil && assignedCondition.Status == metav1.ConditionFalse { + _, err := eIPC.watchFactory.GetNode(cloudPrivateIPConfig.Status.Node) + if err != nil && apierrors.IsNotFound(err) { + klog.Warningf("CloudPrivateIPConfig: %s is in Failed state (reason: %s) and node %s no longer exists, deleting to allow retry", + cloudPrivateIPConfigName, assignedCondition.Message, cloudPrivateIPConfig.Status.Node) + eIPRef := corev1.ObjectReference{ + Kind: "EgressIP", + Name: egressIPName, + } + eIPC.recorder.Eventf(&eIPRef, corev1.EventTypeWarning, "CloudAssignmentRetry", + "egress IP: %s previously failed on deleted node %s (reason: %s), will retry assignment", + egressIP, cloudPrivateIPConfig.Status.Node, assignedCondition.Message) + if err := eIPC.kube.DeleteCloudPrivateIPConfig(cloudPrivateIPConfigName); err != nil { + return fmt.Errorf("failed to delete failed CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) + } + + return fmt.Errorf("deleted failed CloudPrivateIPConfig: %s, will retry creation in next reconciliation", cloudPrivateIPConfigName) + } else if err != nil { + klog.Errorf("Failed to check if node %s exists for CloudPrivateIPConfig %s: %v", cloudPrivateIPConfig.Status.Node, cloudPrivateIPConfigName, err) + } + } + if op.toAdd == cloudPrivateIPConfig.Spec.Node { klog.Infof("CloudPrivateIPConfig: %s already assigned to node: %s", cloudPrivateIPConfigName, cloudPrivateIPConfig.Spec.Node) continue @@ -1671,21 +1697,21 @@ func cloudPrivateIPConfigNameToIPString(name string) string { // removePendingOps removes the existing pending CloudPrivateIPConfig operations // from the cache and returns the EgressIP object which can be re-synced given // the new assignment possibilities. -func (eIPC *egressIPClusterController) removePendingOpsAndGetResyncs(egressIPName, egressIP string) ([]*egressipv1.EgressIP, error) { +func (eIPC *egressIPClusterController) removePendingOpsAndGetResyncs(egressIPName, egressIPAddr string) ([]*egressipv1.EgressIP, error) { eIPC.pendingCloudPrivateIPConfigsMutex.Lock() defer eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() ops, pending := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] if !pending { return nil, fmt.Errorf("no pending operation found for EgressIP: %s", egressIPName) } - op, exists := ops[egressIP] + op, exists := ops[egressIPAddr] if !exists { - return nil, fmt.Errorf("pending operations found for EgressIP: %s, but not for the finalized IP: %s", egressIPName, egressIP) + return nil, fmt.Errorf("pending operations found for EgressIP: %s, but not for the finalized IP: %s", egressIPName, egressIPAddr) } // Make sure we are dealing with a delete operation, since for update // operations will still need to process the add afterwards. if op.toAdd == "" && op.toDelete != "" { - delete(ops, egressIP) + delete(ops, egressIPAddr) } if len(ops) == 0 { delete(eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) @@ -1703,10 +1729,16 @@ func (eIPC *egressIPClusterController) removePendingOpsAndGetResyncs(egressIPNam resyncs := make([]*egressipv1.EgressIP, 0, len(egressIPs)) for _, egressIP := range egressIPs { egressIP := *egressIP - // Do not process the egress IP object which owns the - // CloudPrivateIPConfig for which we are currently processing the - // deletion for. if egressIP.Name == egressIPName { + for _, specIP := range egressIP.Spec.EgressIPs { + // Do not process the egress IP object which owns the + // CloudPrivateIPConfig for which we are currently processing the + // deletion for unless it still has the IP in it's spec + if specIP == egressIPAddr { + resyncs = append(resyncs, &egressIP) + break + } + } continue } unassigned := len(egressIP.Spec.EgressIPs) - len(egressIP.Status.Items) diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index 4391fc7e32..870345a6f4 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -244,7 +244,6 @@ func (ncc *networkClusterController) init() error { var podAllocOpts []annotationalloc.AllocatorOption if util.IsPreconfiguredUDNAddressesEnabled() && ncc.IsPrimaryNetwork() && - persistentIPsEnabled && ncc.TopologyType() == types.Layer2Topology { podAllocOpts = append(podAllocOpts, annotationalloc.WithMACRegistry(mac.NewManager())) } diff --git a/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go b/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go new file mode 100644 index 0000000000..3b94bd72b6 --- /dev/null +++ b/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go @@ -0,0 +1,353 @@ +package networkconnect + +import ( + "errors" + "fmt" + "net" + "time" + + nadlisters "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + apitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +var ( + errConfig = errors.New("configuration error") +) + +// getPrimaryNADForNamespace returns the primary NAD key and network info for a namespace. +// This is used when processing namespaces that may have a primary UDN. +// Returns: +// - nadKey: the primary NAD key in "namespace/name" format (empty if namespace uses default network) +// - network: the network info for the primary network (nil if namespace uses default network) +// - err: error if failed to get/validate the network +// +// If the namespace uses the default network (no primary UDN), returns ("", nil, nil). +// Callers should check for empty nadKey to determine if namespace has a primary UDN. +func getPrimaryNADForNamespace(networkMgr networkmanager.Interface, namespaceName string, nadLister nadlisters.NetworkAttachmentDefinitionLister) (nadKey string, network util.NetInfo, err error) { + namespacePrimaryNetwork, err := networkMgr.GetActiveNetworkForNamespace(namespaceName) + if err != nil { + if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { + // We intentionally ignore the unprocessed active network error because + // UDN Controller hasn't created the NAD yet, OR NAD doesn't exist in a + // namespace that has the required UDN label. It could also be that the + // UDN was deleted and the NAD is also gone. + return "", nil, nil + } + return "", nil, err + } + if namespacePrimaryNetwork.IsDefault() { + // No primary UDN in this namespace + return "", nil, nil + } + // Get the NAD key for the primary network in this namespace. + // Since this is for namespace-scoped UDNs, we expect exactly one NAD per network. + // Today we don't support multiple primary NADs for a namespace, so this is safe. + // Also note if the user misconfigures and ends up with CUDN and UDN for the same namespace, + // and if the CUDN was created first - which means the UDN won't be created successfully, + // then the user uses the P-UDN selector, the CUDN's NAD will be chosen here for this selector + // but that's a design flaw in the user's configuration, and expectation is for users to use + // the selectors correctly. + primaryNADs := namespacePrimaryNetwork.GetNADs() + if len(primaryNADs) != 1 { + return "", nil, fmt.Errorf("expected exactly one primary NAD for namespace %s, got %d", namespaceName, len(primaryNADs)) + } + // There is a race condition where NAD is already deleted from kapi + // but network manager is too slow to update the network manager cache. + // In this case, GetNADs() will return the NADs even though they are deleted. + // So let's fetch the NAD again from the kapi to double confirm it exists + // before returning it. + nadNamespace, nadName, err := cache.SplitMetaNamespaceKey(primaryNADs[0]) + if err != nil { + return "", nil, fmt.Errorf("failed to split NAD key %s: %w", primaryNADs[0], err) + } + _, err = nadLister.NetworkAttachmentDefinitions(nadNamespace).Get(nadName) + if err != nil { + if apierrors.IsNotFound(err) { + klog.Warningf("NAD %s not found in kapi, returning empty network info even if network manager cache says it exists", primaryNADs[0]) + return "", nil, nil + } + return "", nil, err + } + // GetNADs() returns NADs in "namespace/name" format, so use directly + return primaryNADs[0], namespacePrimaryNetwork, nil +} + +func (c *Controller) reconcileClusterNetworkConnect(key string) error { + c.Lock() + defer c.Unlock() + startTime := time.Now() + _, cncName, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return fmt.Errorf("failed to split CNC key %s: %w", key, err) + } + klog.V(5).Infof("reconcileClusterNetworkConnect %s", cncName) + defer func() { + klog.Infof("reconcileClusterNetworkConnect %s took %v", cncName, time.Since(startTime)) + }() + cnc, err := c.cncLister.Get(cncName) + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to get CNC %s: %w", cncName, err) + } + cncState, cncExists := c.cncCache[cncName] + if cnc == nil { + // CNC is being deleted, clean up resources + // Clean up the cache + // Note: allocator cleanup is not needed - it will be garbage collected + // when the cache entry is deleted below since it's self-contained per-CNC + // Annotations also don't need to be removed since object is already deleted. + if cncExists { + // Release tunnel key + c.tunnelKeysAllocator.ReleaseKeys(cncName) + klog.V(4).Infof("Released tunnel key for deleted CNC %s", cncName) + } + + // Clean up the cache + delete(c.cncCache, cncName) + klog.V(4).Infof("Cleaned up cache for deleted CNC %s", cncName) + return nil + } + // If CNC state doesn't exist yet (means its a CNC creation), create entry in the cache + if !cncExists { + cncState = &clusterNetworkConnectState{ + name: cnc.Name, + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + } + connectSubnetAllocator, err := NewHybridConnectSubnetAllocator(cnc.Spec.ConnectSubnets, cncName) + if err != nil { + return fmt.Errorf("failed to initialize subnet allocator for CNC %s: %w", cncName, err) + } + cncState.allocator = connectSubnetAllocator + klog.V(5).Infof("Initialized subnet allocator for CNC %s", cncName) + c.cncCache[cnc.Name] = cncState + } + // STEP1: Validate the CNC + // STEP2: Generate a tunnelID for the connect router corresponding to this CNC + // passing a value greater than 4096 as networkID - actually we don't need this value, + // but it's required by the allocator to ensure that the prederministic tunnel keys + // that are derived from the networkID are not reused for backwards compatibility reasons. + // So we want to skip that range and use the next available tunnel key. + // do this only if the CNC is being created - its a one time allocation. + if cncState.tunnelID == 0 { // cncState will exist as its created above + tunnelID, err := c.tunnelKeysAllocator.AllocateKeys(cnc.Name, 4096+1, 1) + if err != nil { + return fmt.Errorf("failed to allocate tunnel key for CNC %s: %w", cncName, err) + } + err = util.UpdateNetworkConnectRouterTunnelKeyAnnotation(cnc.Name, c.cncClient, tunnelID[0]) + if err != nil { + return fmt.Errorf("failed to update network connect router tunnel key annotation for CNC %s: %w", cncName, err) + } + cncState.tunnelID = tunnelID[0] + } + // STEP3: Discover the selected UDNs and CUDNs + // Discovery, allocation, and release continue on per-network errors, so healthy networks + // make progress. Errors are aggregated and returned at the end. + var errs []error + discoveredNetworks, allMatchingNADKeys, err := c.discoverSelectedNetworks(cnc) + if err != nil { + errs = append(errs, fmt.Errorf("failed to discover selected networks for CNC %s: %w", cncName, err)) + } + // STEP4: Generate or release subnets of size CNC.Spec.ConnectSubnets.NetworkPrefix for each layer3 network + // and /31 or /127 subnets for each layer2 network + // We intentionally don't compute or use the networksNeedingAllocation set here because we want to return all + // currently allocated subnets for each owner back to the annotation update step. + allocatedSubnets, allMatchingNetworkKeys, err := c.allocateSubnets(discoveredNetworks, cncState.allocator) + if err != nil { + errs = append(errs, fmt.Errorf("failed to allocate subnets for CNC %s: %w", cncName, err)) + } + // This step will handle the release of subnets for networks that are no longer matched or are deleted. + // NOTE: Since allMatchingNetworkKeys might not have the network or nad which had a transient error, + // (a rare event like informer list of get or parse nad going wrong for a nad update event), its possible + // we end up releasing and re-allocating subnets for networks that had a transient error. But that risk is + // acceptable and we can live with it in favor of the gain we get by not blocking the setup of other healthy networks. + networksNeedingRelease := cncState.selectedNetworks.Difference(allMatchingNetworkKeys) + if len(networksNeedingRelease) > 0 { + err = c.releaseSubnets(networksNeedingRelease, cncState.allocator) + if err != nil { + errs = append(errs, fmt.Errorf("failed to release subnets for CNC %s: %w", cncName, err)) + } + } + networksNeedingAllocation := allMatchingNetworkKeys.Difference(cncState.selectedNetworks) + klog.V(5).Infof("CNC %s: selectedNetworks=%v, allMatchingNetworkKeys=%v, networksNeedingAllocation=%v, networksNeedingRelease=%v", + cncName, + cncState.selectedNetworks.UnsortedList(), allMatchingNetworkKeys.UnsortedList(), networksNeedingAllocation.UnsortedList(), + networksNeedingRelease.UnsortedList()) + // we need to update the annotation only if there are networks that are newly matched or newly released + if len(networksNeedingAllocation) > 0 || len(networksNeedingRelease) > 0 { + err = util.UpdateNetworkConnectSubnetAnnotation(cnc, c.cncClient, allocatedSubnets) + if err != nil { + return fmt.Errorf("failed to update network connect subnet annotation for CNC %s: %w", cncName, err) + } + } + // plumbing is now done, update the cache with latest + cncState.selectedNADs = allMatchingNADKeys + klog.V(5).Infof("Updated selectedNADs cache for CNC %s with %d NADs", cncName, allMatchingNADKeys.Len()) + cncState.selectedNetworks = allMatchingNetworkKeys + klog.V(5).Infof("Updated selectedNetworks cache for CNC %s with %d networks", cncName, allMatchingNetworkKeys.Len()) + return kerrors.NewAggregate(errs) +} + +func (c *Controller) discoverSelectedNetworks(cnc *networkconnectv1.ClusterNetworkConnect) ([]util.NetInfo, sets.Set[string], error) { + discoveredNetworks := []util.NetInfo{} + allMatchingNADKeys := sets.New[string]() + var errs []error + + for _, selector := range cnc.Spec.NetworkSelectors { + switch selector.NetworkSelectionType { + case apitypes.ClusterUserDefinedNetworks: + networkSelector, err := metav1.LabelSelectorAsSelector(&selector.ClusterUserDefinedNetworkSelector.NetworkSelector) + if err != nil { + errs = append(errs, fmt.Errorf("failed to parse CUDN network selector: %w", err)) + continue + } + nads, err := c.nadLister.List(networkSelector) + if err != nil { + errs = append(errs, fmt.Errorf("failed to list NADs for CUDN selector: %w", err)) + continue + } + for _, nad := range nads { + // check this NAD is controlled by a CUDN + controller := metav1.GetControllerOfNoCopy(nad) + isCUDN := controller != nil && controller.Kind == cudnGVK.Kind && controller.APIVersion == cudnGVK.GroupVersion().String() + if !isCUDN { + continue + } + network, err := util.ParseNADInfo(nad) + if err != nil { + errs = append(errs, fmt.Errorf("failed to parse NAD %s/%s: %w", nad.Namespace, nad.Name, err)) + continue + } + if !network.IsPrimaryNetwork() { + continue + } + // This NAD passed all validation checks, so it's selected by this CNC + nadKey := nad.Namespace + "/" + nad.Name + allMatchingNADKeys.Insert(nadKey) + discoveredNetworks = append(discoveredNetworks, network) + } + case apitypes.PrimaryUserDefinedNetworks: + namespaceSelector, err := metav1.LabelSelectorAsSelector(&selector.PrimaryUserDefinedNetworkSelector.NamespaceSelector) + if err != nil { + errs = append(errs, fmt.Errorf("failed to parse PUDN namespace selector: %w", err)) + continue + } + namespaces, err := c.namespaceLister.List(namespaceSelector) + if err != nil { + errs = append(errs, fmt.Errorf("failed to list namespaces for PUDN selector: %w", err)) + continue + } + for _, ns := range namespaces { + nadKey, namespacePrimaryNetwork, err := getPrimaryNADForNamespace(c.networkManager, ns.Name, c.nadLister) + if err != nil { + errs = append(errs, fmt.Errorf("failed to get active network for namespace %s: %w", ns.Name, err)) + continue + } + if nadKey == "" { + // Namespace uses default network (no primary UDN) or UDN was deleted + continue + } + allMatchingNADKeys.Insert(nadKey) + discoveredNetworks = append(discoveredNetworks, namespacePrimaryNetwork) + } + default: + errs = append(errs, fmt.Errorf("%w: unsupported network selection type %s", errConfig, selector.NetworkSelectionType)) + } + } + + return discoveredNetworks, allMatchingNADKeys, kerrors.NewAggregate(errs) +} + +func computeNetworkOwner(networkType string, networkID int) string { + return fmt.Sprintf("%s_%d", networkType, networkID) +} + +// parseNetworkOwnerTopology extracts the topology type from an owner key. +// Owner keys are formatted as "{topology}_{networkID}" (e.g., "layer3_1", "layer2_2"). +func parseNetworkOwnerTopology(owner string) (topologyType string, ok bool) { + if len(owner) > len(ovntypes.Layer3Topology)+1 && owner[:len(ovntypes.Layer3Topology)] == ovntypes.Layer3Topology { + return ovntypes.Layer3Topology, true + } + if len(owner) > len(ovntypes.Layer2Topology)+1 && owner[:len(ovntypes.Layer2Topology)] == ovntypes.Layer2Topology { + return ovntypes.Layer2Topology, true + } + return "", false +} + +// allocateSubnets allocates subnets for the given discovered networks +// It returns a map of owner to subnets +// NOTE: If owner already had its subnets allocated, it will simply return those existing subnets +func (c *Controller) allocateSubnets(discoveredNetworks []util.NetInfo, allocator HybridConnectSubnetAllocator) (map[string][]*net.IPNet, sets.Set[string], error) { + var owner string + var subnets []*net.IPNet + var errs []error + allMatchingNetworkKeys := sets.New[string]() + allocatedSubnets := make(map[string][]*net.IPNet) + for _, network := range discoveredNetworks { + networkID := network.GetNetworkID() + if networkID == ovntypes.NoNetworkID { + errs = append(errs, fmt.Errorf("network id is invalid for network %s", network.GetNetworkName())) + continue + } + var err error + if network.TopologyType() == ovntypes.Layer3Topology { + owner = computeNetworkOwner(ovntypes.Layer3Topology, networkID) + subnets, err = allocator.AllocateLayer3Subnet(owner) + if err != nil { + errs = append(errs, fmt.Errorf("failed to allocate Layer3 subnet for network %s: %w", network.GetNetworkName(), err)) + continue + } + } else if network.TopologyType() == ovntypes.Layer2Topology { + owner = computeNetworkOwner(ovntypes.Layer2Topology, networkID) + subnets, err = allocator.AllocateLayer2Subnet(owner) + if err != nil { + errs = append(errs, fmt.Errorf("failed to allocate Layer2 subnet for network %s: %w", network.GetNetworkName(), err)) + continue + } + } else { + errs = append(errs, fmt.Errorf("unsupported network topology type %s for network %s", network.TopologyType(), network.GetNetworkName())) + continue + } + allocatedSubnets[owner] = subnets + allMatchingNetworkKeys.Insert(owner) + klog.V(5).Infof("Allocated subnets %v for %s (network: %s)", subnets, owner, network.GetNetworkName()) + } + return allocatedSubnets, allMatchingNetworkKeys, kerrors.NewAggregate(errs) +} + +// releaseSubnets releases subnets for the given network keys. +// Network keys encode topology type and network ID (e.g., "layer3_1", "layer2_2"), +// allowing subnet release without needing to re-discover network info. +func (c *Controller) releaseSubnets(networksNeedingRelease sets.Set[string], + allocator HybridConnectSubnetAllocator) error { + var errs []error + for networkKey := range networksNeedingRelease { + topologyType, ok := parseNetworkOwnerTopology(networkKey) + if !ok { + errs = append(errs, fmt.Errorf("invalid network key format: %s", networkKey)) + continue + } + switch topologyType { + case ovntypes.Layer3Topology: + allocator.ReleaseLayer3Subnet(networkKey) + case ovntypes.Layer2Topology: + allocator.ReleaseLayer2Subnet(networkKey) + default: + errs = append(errs, fmt.Errorf("unsupported network topology type %s for network %s", topologyType, networkKey)) + continue + } + klog.V(5).Infof("Released subnets for network %s", networkKey) + } + return kerrors.NewAggregate(errs) +} diff --git a/go-controller/pkg/clustermanager/networkconnect/controller.go b/go-controller/pkg/clustermanager/networkconnect/controller.go new file mode 100644 index 0000000000..df97a9f3a9 --- /dev/null +++ b/go-controller/pkg/clustermanager/networkconnect/controller.go @@ -0,0 +1,563 @@ +package networkconnect + +import ( + "fmt" + "reflect" + "sync" + "time" + + nadv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + nadclientset "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned" + nadlisters "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" + controllerutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + networkconnectclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned" + networkconnectlisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/listers/clusternetworkconnect/v1" + apitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + userdefinednetworkv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +var ( + cudnGVK = userdefinednetworkv1.SchemeGroupVersion.WithKind("ClusterUserDefinedNetwork") + udnGVK = userdefinednetworkv1.SchemeGroupVersion.WithKind("UserDefinedNetwork") +) + +// clusterNetworkConnectState is the cache that keeps the state of a single +// cluster network connect in the cluster with name being unique +type clusterNetworkConnectState struct { + // name of the cluster network connect (unique across cluster) + name string + // allocator for this CNC's subnet allocation + allocator HybridConnectSubnetAllocator + // map of NADs currently selected by this CNC's network selectors + // {value: NAD namespace/name key} + // this cache is mainly required to be able to detect when a + // NAD is started or stopped matching the CNC. That way, we don't + // need to call reconcile on all CNCs and only the specific ones + // that select this NAD. + // Specially, when a NAD is deleted, we need to release the subnets allocated for + // matching CNCs and since nadKey is the only information we get + // since NAD object itself will be nil since its deleted, we need + // to keep track of NAD keys. + selectedNADs sets.Set[string] + // set of networks currently selected by this CNC's network selectors + // {value: network owner key like "layer3_1" or "layer2_2"} + // Owner keys are computed from topology type (layer3 or layer2) and network ID, enabling subnet release + // without needing to re-discover network info. + selectedNetworks sets.Set[string] + // tunnelID for this CNC's connect router + tunnelID int +} + +type Controller struct { + // wf is the watch factory for accessing informers + wf *factory.WatchFactory + // listers + cncLister networkconnectlisters.ClusterNetworkConnectLister + namespaceLister corelisters.NamespaceLister + nadLister nadlisters.NetworkAttachmentDefinitionLister + //clientset + cncClient networkconnectclientset.Interface + nadClient nadclientset.Interface + // Controller for managing cluster-network-connect events + cncController controllerutil.Controller + // Controller for managing NetworkAttachmentDefinition events + nadController controllerutil.Controller + // Controller for managing Namespace events + namespaceController controllerutil.Controller + networkManager networkmanager.Interface + + // Single global lock protecting all controller state + // We can improve this later by using a more fine-grained lock based on performance testing + sync.RWMutex + // holds the state for each CNC keyed by CNC name + cncCache map[string]*clusterNetworkConnectState + tunnelKeysAllocator *id.TunnelKeysAllocator +} + +func NewController( + wf *factory.WatchFactory, + ovnClient *util.OVNClusterManagerClientset, + networkManager networkmanager.Interface, + tunnelKeysAllocator *id.TunnelKeysAllocator, +) *Controller { + cncLister := wf.ClusterNetworkConnectInformer().Lister() + nadLister := wf.NADInformer().Lister() + namespaceLister := wf.NamespaceInformer().Lister() + c := &Controller{ + wf: wf, + cncClient: ovnClient.NetworkConnectClient, + nadClient: ovnClient.NetworkAttchDefClient, + cncLister: cncLister, + nadLister: nadLister, + namespaceLister: namespaceLister, + networkManager: networkManager, + cncCache: make(map[string]*clusterNetworkConnectState), + tunnelKeysAllocator: tunnelKeysAllocator, + } + + cncCfg := &controllerutil.ControllerConfig[networkconnectv1.ClusterNetworkConnect]{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Informer: wf.ClusterNetworkConnectInformer().Informer(), + Lister: cncLister.List, + Reconcile: c.reconcileClusterNetworkConnect, + ObjNeedsUpdate: cncNeedsUpdate, + Threadiness: 1, + } + c.cncController = controllerutil.NewController( + "clustermanager-network-connect-controller", + cncCfg, + ) + + nadCfg := &controllerutil.ControllerConfig[nadv1.NetworkAttachmentDefinition]{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Informer: wf.NADInformer().Informer(), + Lister: nadLister.List, + Reconcile: c.reconcileNAD, + ObjNeedsUpdate: nadNeedsUpdate, + Threadiness: 1, + } + c.nadController = controllerutil.NewController( + "clustermanager-network-connect-network-attachment-definition-controller", + nadCfg, + ) + + namespaceCfg := &controllerutil.ControllerConfig[corev1.Namespace]{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Informer: wf.NamespaceInformer().Informer(), + Lister: namespaceLister.List, + Reconcile: c.reconcileNamespace, + ObjNeedsUpdate: namespaceNeedsUpdate, + Threadiness: 1, + } + c.namespaceController = controllerutil.NewController( + "clustermanager-network-connect-namespace-controller", + namespaceCfg, + ) + + return c +} + +func (c *Controller) Start() error { + defer klog.Infof("Cluster manager network connect controllers started") + return controllerutil.StartWithInitialSync( + c.initialSync, + c.cncController, + c.nadController, + c.namespaceController, + ) +} + +// initialSync restores allocator state from existing CNC annotations at startup. +// This is called after informers are synced but before workers start processing. +// It ensures that subnets already allocated (stored in annotations) are not re-allocated. +func (c *Controller) initialSync() error { + c.Lock() + defer c.Unlock() + + cncs, err := c.cncLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("failed to list CNCs during initial sync: %v", err) + } + + for _, cnc := range cncs { + // Parse existing subnet annotation + allocatedSubnets, err := util.ParseNetworkConnectSubnetAnnotation(cnc) + if err != nil { + klog.Warningf("Failed to parse subnet annotation for CNC %s: %v, skipping", cnc.Name, err) + continue + } + + // Parse existing tunnel key annotation + tunnelID, err := util.ParseNetworkConnectTunnelKeyAnnotation(cnc) + if err != nil { + klog.Warningf("Failed to parse tunnel key annotation for CNC %s: %v, skipping", cnc.Name, err) + continue + } + + // Initialize CNC state in cache + cncState := &clusterNetworkConnectState{ + name: cnc.Name, + // NOTE: We intentionally don't restore selectedNADs as its not strictly needed. + // Why this is okay: + // selectedNADs tracks NAD keys (e.g., "namespace/name") which aren't stored in + // the annotation - the annotation only has owner keys. + // During the first CNC reconcile (which happens right after initialSync since the + // Add events are queued), reconcileClusterNetworkConnect runs discoverSelectedNetworks which: + // Iterates through NADs matching the selectors + // Returns allMatchingNADKeys + // Then updates the cache: cncState.selectedNADs = allMatchingNADKeys + // The ordering is safe because StartWithInitialSync ensures: + // Informer caches are synced (all NADs visible) + // initialSync runs (allocator state restored) + // THEN workers start processing the queue (CNC reconciles happen) + // Edge case: If a NAD update comes in during this window, mustProcessCNCForNAD might + // see wasSelected=false (empty cache) and isSelected=true → trigger an extra reconcile. + // But that's benign - just an extra no-op reconcile. + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + tunnelID: tunnelID, + } + connectSubnetAllocator, err := NewHybridConnectSubnetAllocator(cnc.Spec.ConnectSubnets, cnc.Name) + if err != nil { + return fmt.Errorf("failed to initialize subnet allocator for CNC %s: %w", cnc.Name, err) + } + cncState.allocator = connectSubnetAllocator + c.cncCache[cnc.Name] = cncState + + // Restore tunnel key in allocator if present + if tunnelID > 0 { + // Reserve tunnel key in the allocator so it won't be re-allocated + // We already reserve the key from cluster manager sync in initTunnelKeysAllocator, + // but no harm in doing it again here for completeness. + if err := c.tunnelKeysAllocator.ReserveKeys(cnc.Name, []int{tunnelID}); err != nil { + klog.Warningf("Failed to restore tunnel key %d for CNC %s: %v", tunnelID, cnc.Name, err) + } else { + klog.V(4).Infof("Restored tunnel key %d for CNC %s", tunnelID, cnc.Name) + } + } + + // Restore subnets if present + if len(allocatedSubnets) > 0 { + if err := cncState.allocator.MarkAllocatedSubnets(allocatedSubnets); err != nil { + klog.Warningf("Failed to restore subnets for CNC %s: %v", cnc.Name, err) + continue + } + + // Populate selectedNetworks from the restored allocations + for owner := range allocatedSubnets { + cncState.selectedNetworks.Insert(owner) + } + klog.V(4).Infof("Restored %d subnet allocations for CNC %s", len(allocatedSubnets), cnc.Name) + } + } + + klog.Infof("Initial sync completed: restored state for %d CNCs", len(cncs)) + return nil +} + +func (c *Controller) Stop() { + controllerutil.Stop( + c.cncController, + c.nadController, + c.namespaceController, + ) + klog.Infof("Cluster manager network connect controllers stopped") +} + +func cncNeedsUpdate(oldObj, newObj *networkconnectv1.ClusterNetworkConnect) bool { + // Case 1: CNC is being deleted + // Case 2: CNC is being created + if oldObj == nil || newObj == nil { + return true + } + // Case 3: CNC is being updated + // Only trigger updates when the Spec.NetworkSelectors changes + // We only need to check for selector changes + // and don't need to react on connectivity enabled field changes + // from cluster manager. + // connectSubnet is immutable so that can't change after creation. + return !reflect.DeepEqual(oldObj.Spec.NetworkSelectors, newObj.Spec.NetworkSelectors) +} + +func nadNeedsUpdate(oldObj, newObj *nadv1.NetworkAttachmentDefinition) bool { + nadSupported := func(nad *nadv1.NetworkAttachmentDefinition) bool { + if nad == nil { + return false + } + // we don't support direct NADs anymore. CNC is only supported for CUDNs and UDNs + controller := metav1.GetControllerOfNoCopy(nad) + isCUDN := controller != nil && controller.Kind == cudnGVK.Kind && controller.APIVersion == cudnGVK.GroupVersion().String() + isUDN := controller != nil && controller.Kind == udnGVK.Kind && controller.APIVersion == udnGVK.GroupVersion().String() + if !isCUDN && !isUDN { + return false + } + network, err := util.ParseNADInfo(nad) + if err != nil { + // cannot parse NAD info, so we take this as unsupported NAD error + return false + } + if network.IsPrimaryNetwork() { + // only layer3 and layer2 topology are supported + // but since primary network is always layer3 or layer2, + // we can ignored the need to check the topology + return true + } + return false // we don't support secondary networks, so we can ignore it + } + // ignore if we don't support this NAD + if !nadSupported(oldObj) && !nadSupported(newObj) { + return false + } + // CASE1: NAD is being deleted (UDN or CUDN is being deleted) + // CASE2: NAD is being created (UDN or CUDN is being created) + if oldObj == nil || newObj == nil { + return true + } + oldNADLabels := labels.Set(oldObj.Labels) + newNADLabels := labels.Set(newObj.Labels) + labelsChanged := !labels.Equals(oldNADLabels, newNADLabels) + // safe spot check for ovn network id annotation add happening as update to NADs + annotationsChanged := oldObj.Annotations[ovntypes.OvnNetworkIDAnnotation] != newObj.Annotations[ovntypes.OvnNetworkIDAnnotation] + + // CASE3: NAD is being updated (UDN or CUDN is being updated) + // 3.1: NAD network id annotation changed + // 3.2: NAD labels changed (only relevant for CUDNs->NADs) + return labelsChanged || annotationsChanged +} + +func (c *Controller) reconcileNAD(key string) error { + // Use single global lock following ANP controller pattern + c.Lock() + defer c.Unlock() + + startTime := time.Now() + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return fmt.Errorf("failed to split NAD key %s: %w", key, err) + } + + klog.V(5).Infof("reconcileNAD %s", key) + defer func() { + klog.Infof("reconcileNAD %s took %v", key, time.Since(startTime)) + }() + + nad, err := c.nadLister.NetworkAttachmentDefinitions(namespace).Get(name) + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to get NAD %s: %w", key, err) + } + + existingCNCs, err := c.cncLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("failed to list CNCs: %w", err) + } + + // Process each CNC to check if this NAD's matching state changed + for _, cnc := range existingCNCs { + if c.mustProcessCNCForNAD(nad, cnc, key) { + c.cncController.Reconcile(cnc.Name) + } + } + return nil +} + +// mustProcessCNCForNAD checks if: +// 1. the provided NAD previously matched the given CNC and now it stopped matching OR +// 2. the provided NAD currently matches the given CNC and previously it didn't match +// 3. the provided NAD previously matched the given CNC and now it continues to match it +// Returns true if any of the above conditions are true. +// This function is READ-ONLY and does not update the cache. +// NOTE: Caller must hold the global lock. +func (c *Controller) mustProcessCNCForNAD(nad *nadv1.NetworkAttachmentDefinition, cnc *networkconnectv1.ClusterNetworkConnect, nadKey string) bool { + cncState, cncExists := c.cncCache[cnc.Name] + + // If CNC state doesn't exist yet, we don't know the previous state + // so we assume no change (cache will be populated during CNC reconciliation) + if !cncExists { + klog.V(5).Infof("CNC %s state not found in cache, assuming no matching state change for NAD %s", cnc.Name, nadKey) + return false + } + + // Check if NAD used to be selected (using cache) + wasSelected := cncState.selectedNADs.Has(nadKey) + + // Determine if NAD started to be selected now + isSelected := false + if nad != nil { + nadLabels := labels.Set(nad.Labels) + selectorLoop: // break out of the loop if we find a match + for _, networkSelector := range cnc.Spec.NetworkSelectors { + switch networkSelector.NetworkSelectionType { + case apitypes.ClusterUserDefinedNetworks: + cudnSelector, err := metav1.LabelSelectorAsSelector(&networkSelector.ClusterUserDefinedNetworkSelector.NetworkSelector) + if err != nil { + klog.Errorf("Failed to create selector for CNC %s: %v", cnc.Name, err) + continue + } + // labels on CUDN are copied to the corresponding NADs, so we can use the same selector + if cudnSelector.Matches(nadLabels) { + isSelected = true + break selectorLoop + } + case apitypes.PrimaryUserDefinedNetworks: + namespaceSelector, err := metav1.LabelSelectorAsSelector(&networkSelector.PrimaryUserDefinedNetworkSelector.NamespaceSelector) + if err != nil { + klog.Errorf("Failed to create selector for CNC %s: %v", cnc.Name, err) + continue + } + namespaces, err := c.namespaceLister.List(namespaceSelector) + if err != nil { + klog.Errorf("Failed to list namespaces for CNC %s: %v", cnc.Name, err) + continue + } + for _, namespace := range namespaces { + primaryNAD, err := c.networkManager.GetActiveNetworkForNamespace(namespace.Name) + if err != nil { + if util.IsUnprocessedActiveNetworkError(err) || util.IsInvalidPrimaryNetworkError(err) { + continue + } + klog.Errorf("Failed to get active network for namespace %s: %v", namespace.Name, err) + continue + } + if primaryNAD.HasNAD(nadKey) { + isSelected = true + break selectorLoop + } + } + default: + klog.Errorf("Unsupported network selection type %s for CNC %s", networkSelector.NetworkSelectionType, cnc.Name) + continue + } + } + } + + // Log state changes + stateChanged := wasSelected != isSelected + if stateChanged { + if isSelected && !wasSelected { + klog.V(4).Infof("NAD %s started to match CNC %s, requeuing...", nadKey, cnc.Name) + } else if !isSelected && wasSelected { + klog.V(4).Infof("NAD %s used to match CNC %s, requeuing...", nadKey, cnc.Name) + } + } + + // reason we need to also process if the NAD simply continues to match is because + // NAD could have had its network-id annotation update which we use in CNC reconciliation to + // generate the subnet for the connect router corresponding to this CNC. + return wasSelected || isSelected +} + +func namespaceNeedsUpdate(oldObj, newObj *corev1.Namespace) bool { + // Case 1: namespace is being deleted + // Case 2: namespace is being created + // for both these cases, we don't care because + // we only care about UDNs being created or deleted + // in those namespaces which is already handled by the NAD controller + if oldObj == nil || newObj == nil { + return false + } + namespaceSupported := func(namespace *corev1.Namespace) bool { + if namespace == nil { + return false + } + // we only support primary UDNs in namespaces that have the required label + _, ok := namespace.Labels[ovntypes.RequiredUDNNamespaceLabel] + return ok + } + if !namespaceSupported(oldObj) && !namespaceSupported(newObj) { + return false + } + // Case 3: namespace is being updated (we only care about labels changes) + oldNamespaceLabels := labels.Set(oldObj.Labels) + newNamespaceLabels := labels.Set(newObj.Labels) + labelsChanged := !labels.Equals(oldNamespaceLabels, newNamespaceLabels) + return labelsChanged +} + +func (c *Controller) reconcileNamespace(key string) error { + c.Lock() + defer c.Unlock() + + startTime := time.Now() + klog.V(5).Infof("reconcileNamespace %s", key) + defer func() { + klog.Infof("reconcileNamespace %s took %v", key, time.Since(startTime)) + }() + + namespace, err := c.namespaceLister.Get(key) + if err != nil { + if apierrors.IsNotFound(err) { + // Namespace deleted - nothing to do since NAD controller + // will handle any NAD deletions in this namespace + // which will trigger a CNC reconcile for any CNCs selecting + // this namespace. + return nil + } + return fmt.Errorf("failed to get namespace %s: %w", key, err) + } + + primaryNAD, _, err := getPrimaryNADForNamespace(c.networkManager, key, c.nadLister) + if err != nil { + klog.Errorf("Failed to get primary NAD for namespace %s: %v", key, err) + // best effort, usually if a NAD then gets created/deleted in this namespace, + // we will get a NAD event anyways + return nil + } + if primaryNAD == "" { + // no primary UDN in this namespace, so we don't need to do anything + return nil + } + + existingCNCs, err := c.cncLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("failed to list CNCs: %w", err) + } + for _, cnc := range existingCNCs { + if c.mustProcessCNCForNamespace(cnc, namespace, primaryNAD) { + c.cncController.Reconcile(cnc.Name) + } + } + return nil +} + +// mustProcessCNCForNamespace determines if: +// 1. the given namespace was previously selected by the given CNC and now it stopped matching OR +// 2. if its currently selected by the given CNC and previously it didn't match +// returns true if either of the above conditions are true +func (c *Controller) mustProcessCNCForNamespace(cnc *networkconnectv1.ClusterNetworkConnect, namespace *corev1.Namespace, primaryNAD string) bool { + cncState, cncExists := c.cncCache[cnc.Name] + + // If CNC state doesn't exist yet, we don't know the previous state + // so we assume no change (cache will be populated during CNC reconciliation) + if !cncExists { + klog.V(5).Infof("CNC %s state not found in cache, assuming no matching state change for namespace %s", cnc.Name, namespace.Name) + return false + } + wasSelected := cncState.selectedNADs.Has(primaryNAD) + isSelected := false + +selectorLoop: + for _, networkSelector := range cnc.Spec.NetworkSelectors { + switch networkSelector.NetworkSelectionType { + case apitypes.PrimaryUserDefinedNetworks: + namespaceSelector, err := metav1.LabelSelectorAsSelector( + &networkSelector.PrimaryUserDefinedNetworkSelector.NamespaceSelector) + if err != nil { + klog.Errorf("Failed to create selector for CNC %s: %v", cnc.Name, err) + continue + } + if namespaceSelector.Matches(labels.Set(namespace.Labels)) { + isSelected = true + break selectorLoop + } + } + } + stateChanged := wasSelected != isSelected + if stateChanged { + if isSelected && !wasSelected { + klog.V(4).Infof("Namespace %s started to match CNC %s, requeuing...", namespace.Name, cnc.Name) + } else if !isSelected && wasSelected { + klog.V(4).Infof("Namespace %s used to match CNC %s, requeuing...", namespace.Name, cnc.Name) + } + } + // If state didn't change, that is if this namespace was previously selected + // and continues to be selected, it means it was some other label updates to + // namespace that we don't care about. State changes are the only ones we care about. + return stateChanged +} diff --git a/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go b/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go new file mode 100644 index 0000000000..6e5d051466 --- /dev/null +++ b/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go @@ -0,0 +1,2849 @@ +package networkconnect + +import ( + "context" + "encoding/json" + "fmt" + "net" + "sync" + "testing" + "time" + + cnitypes "github.com/containernetworking/cni/pkg/types" + nadv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/onsi/gomega" + "github.com/onsi/gomega/format" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + controllerutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + networkconnectfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned/fake" + apitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + userdefinednetworkv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// NOTE: This file tests the elements of the networkconnect controller +// in a modular fashion. It is not a comprehensive test of the full controller. +// It focuses on testing the individual functions of the controller. + +// makeCUDNOwnerRef creates an owner reference for a ClusterUserDefinedNetwork +func makeCUDNOwnerRef(name string) metav1.OwnerReference { + return metav1.OwnerReference{ + APIVersion: userdefinednetworkv1.SchemeGroupVersion.String(), + Kind: "ClusterUserDefinedNetwork", + Name: name, + Controller: func() *bool { b := true; return &b }(), + } +} + +// makeUDNOwnerRef creates an owner reference for a UserDefinedNetwork +func makeUDNOwnerRef(name string) metav1.OwnerReference { + return metav1.OwnerReference{ + APIVersion: userdefinednetworkv1.SchemeGroupVersion.String(), + Kind: "UserDefinedNetwork", + Name: name, + Controller: func() *bool { b := true; return &b }(), + } +} + +// testCNC is a helper to build ClusterNetworkConnect objects for testing +type testCNC struct { + Name string + NetworkSelectors []apitypes.NetworkSelector + ConnectSubnets []networkconnectv1.ConnectSubnet + Connectivity []networkconnectv1.ConnectivityType +} + +func (tc testCNC) ClusterNetworkConnect() *networkconnectv1.ClusterNetworkConnect { + cnc := &networkconnectv1.ClusterNetworkConnect{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.Name, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + NetworkSelectors: tc.NetworkSelectors, + ConnectSubnets: tc.ConnectSubnets, + Connectivity: tc.Connectivity, + }, + } + if len(tc.ConnectSubnets) == 0 { + cnc.Spec.ConnectSubnets = []networkconnectv1.ConnectSubnet{ + {CIDR: "192.168.0.0/16", NetworkPrefix: 24}, + {CIDR: "fd00:10:244::/112", NetworkPrefix: 120}, // matches ipv4 /24: 32-24=8, 128-8=120 + } + } + if len(tc.Connectivity) == 0 { + cnc.Spec.Connectivity = []networkconnectv1.ConnectivityType{ + networkconnectv1.PodNetwork, + } + } + return cnc +} + +// testNAD is a helper to build NetworkAttachmentDefinition objects for testing +type testNAD struct { + Name string + Namespace string + Network string + Labels map[string]string + Annotations map[string]string + // IsCUDN indicates if this NAD is owned by a ClusterUserDefinedNetwork + IsCUDN bool + // IsUDN indicates if this NAD is owned by a UserDefinedNetwork + IsUDN bool + // IsPrimary indicates if this is a primary network + IsPrimary bool + // Topology is the network topology (layer3 or layer2) + Topology string + // Subnet is the subnet CIDR for the network + Subnet string + // NetworkID is the OVN network ID annotation value + NetworkID string +} + +func (tn testNAD) NAD() *nadv1.NetworkAttachmentDefinition { + if tn.Annotations == nil { + tn.Annotations = map[string]string{} + } + tn.Annotations[types.OvnNetworkNameAnnotation] = tn.Network + if tn.NetworkID != "" { + tn.Annotations[types.OvnNetworkIDAnnotation] = tn.NetworkID + } + + nad := &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: tn.Name, + Namespace: tn.Namespace, + Labels: tn.Labels, + Annotations: tn.Annotations, + }, + } + + // Set owner reference for CUDN + if tn.IsCUDN { + ownerRef := makeCUDNOwnerRef(tn.Network) + nad.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ownerRef} + } + + // Set owner reference for UDN + if tn.IsUDN { + ownerRef := makeUDNOwnerRef(tn.Name) + nad.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ownerRef} + } + + // Build NAD spec config + topology := tn.Topology + if topology == "" { + topology = types.Layer3Topology + } + role := "" + if tn.IsPrimary { + role = ", \"role\": \"primary\"" + } + subnet := "" + if tn.Subnet != "" { + subnet = fmt.Sprintf(", \"subnets\": \"%s\"", tn.Subnet) + } + + nad.Spec.Config = fmt.Sprintf( + "{\"cniVersion\": \"0.4.0\", \"name\": \"%s\", \"type\": \"%s\", \"topology\": \"%s\", \"netAttachDefName\": \"%s/%s\"%s%s}", + tn.Network, + config.CNI.Plugin, + topology, + tn.Namespace, + tn.Name, + role, + subnet, + ) + + return nad +} + +// testNamespace is a helper to build Namespace objects for testing +type testNamespace struct { + Name string + Labels map[string]string + // RequiresUDN indicates this namespace requires a UDN but doesn't have one yet. + // This simulates the error condition where GetActiveNetworkForNamespace returns an error. + RequiresUDN bool +} + +func (tn testNamespace) Namespace() *corev1.Namespace { + return &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: tn.Name, + Labels: tn.Labels, + }, + } +} + +func TestController_reconcileClusterNetworkConnect(t *testing.T) { + tests := []struct { + // name is the test case name + name string + // cnc is the ClusterNetworkConnect object to create and reconcile + cnc *testCNC + // nads is the list of NetworkAttachmentDefinitions to create. + // NADs with IsUDN=true and IsPrimary=true will auto-populate FakeNetworkManager.PrimaryNetworks. + nads []*testNAD + // namespaces is the list of Namespaces to create (used for Primary UDN selector tests). + // Namespaces with RequiresUDN=true but no matching UDN NAD will trigger GetActiveNetworkForNamespace error. + namespaces []*testNamespace + // reconcile is the CNC name to reconcile + reconcile string + // wantErr indicates if reconciliation should return an error + wantErr bool + // expectSelectedNADs is the list of NAD keys expected to be selected + expectSelectedNADs []string + // expectSelectedNetworks is the list of network names expected to be selected + expectSelectedNetworks []string + // expectTunnelIDAllocated indicates if a tunnel ID should be allocated + expectTunnelIDAllocated bool + // expectSubnetsAllocated indicates if subnets should be allocated + expectSubnetsAllocated bool + // expectCacheEntryExists indicates if a cache entry should exist after reconciliation + expectCacheEntryExists bool + // expectCacheEntryDeleted indicates if the cache entry should be deleted (for CNC deletion tests) + expectCacheEntryDeleted bool + }{ + // Primary CUDN owned NAD selection tests + { + name: "creates cache entry and allocates tunnel ID and subnets for new CNC with CUDN selector", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nads: []*testNAD{ + { + Name: "cudn-red", + Namespace: "red", + Network: util.GenerateCUDNNetworkName("red"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.0.0.0/16", + Labels: map[string]string{"selected": "true"}, + NetworkID: "1", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{"red/cudn-red"}, + expectSelectedNetworks: []string{"layer3_1"}, + expectTunnelIDAllocated: true, + expectSubnetsAllocated: true, + expectCacheEntryExists: true, + }, + { + name: "selects multiple CUDNs matching label selector", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"env": "test"}, + }, + }, + }, + }, + }, + nads: []*testNAD{ + { + Name: "cudn-blue", + Namespace: "blue", + Network: util.GenerateCUDNNetworkName("blue"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.1.0.0/16", + Labels: map[string]string{"env": "test"}, + NetworkID: "2", + }, + { + Name: "cudn-green", + Namespace: "green", + Network: util.GenerateCUDNNetworkName("green"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer2Topology, + Subnet: "10.2.0.0/16", + Labels: map[string]string{"env": "test"}, + NetworkID: "3", + }, + { + Name: "cudn-yellow", + Namespace: "yellow", + Network: util.GenerateCUDNNetworkName("yellow"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.3.0.0/16", + Labels: map[string]string{"env": "prod"}, // not selected + NetworkID: "4", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{"blue/cudn-blue", "green/cudn-green"}, + expectSelectedNetworks: []string{"layer3_2", "layer2_3"}, + expectTunnelIDAllocated: true, + expectSubnetsAllocated: true, + expectCacheEntryExists: true, + }, + { + name: "ignores non-CUDN NADs even if labels match", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nads: []*testNAD{ + { + Name: "regular-nad", + Namespace: "test", + Network: "regular-network", + IsCUDN: false, // not a CUDN + IsPrimary: true, + Topology: types.Layer3Topology, + Labels: map[string]string{"selected": "true"}, + NetworkID: "5", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{}, + expectSelectedNetworks: []string{}, + expectCacheEntryExists: true, + }, + { + name: "ignores secondary network NADs", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nads: []*testNAD{ + { + Name: "secondary-cudn", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("secondary"), + IsCUDN: true, + IsPrimary: false, // secondary, not primary + Topology: types.Layer3Topology, + Labels: map[string]string{"selected": "true"}, + NetworkID: "6", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{}, + expectSelectedNetworks: []string{}, + expectCacheEntryExists: true, + }, + { + name: "deletes cache entry when CNC is deleted", + cnc: nil, // CNC doesn't exist + reconcile: "deleted-cnc", + expectCacheEntryDeleted: true, + }, + { + name: "handles layer2 topology networks", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nads: []*testNAD{ + { + Name: "cudn-layer2", + Namespace: "layer2ns", + Network: util.GenerateCUDNNetworkName("layer2net"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer2Topology, + Subnet: "10.5.0.0/16", + Labels: map[string]string{"selected": "true"}, + NetworkID: "7", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{"layer2ns/cudn-layer2"}, + expectSelectedNetworks: []string{"layer2_7"}, + expectTunnelIDAllocated: true, + expectSubnetsAllocated: true, + expectCacheEntryExists: true, + }, + // Primary UDN selector tests + { + name: "selects primary UDN by namespace selector", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"udn": "enabled"}, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "ns1", Labels: map[string]string{"udn": "enabled"}}, + {Name: "ns2", Labels: map[string]string{"udn": "disabled"}}, + }, + nads: []*testNAD{ + { + Name: "primary-udn", + Namespace: "ns1", + Network: "ns1-primary-udn", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.10.0.0/16", + NetworkID: "10", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{"ns1/primary-udn"}, + expectSelectedNetworks: []string{"layer3_10"}, + expectTunnelIDAllocated: true, + expectSubnetsAllocated: true, + expectCacheEntryExists: true, + }, + { + name: "selects multiple primary UDNs from multiple namespaces", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "frontend-a", Labels: map[string]string{"tier": "frontend"}}, + {Name: "frontend-b", Labels: map[string]string{"tier": "frontend"}}, + {Name: "backend", Labels: map[string]string{"tier": "backend"}}, + }, + nads: []*testNAD{ + { + Name: "udn-a", + Namespace: "frontend-a", + Network: "frontend-a-udn", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.20.0.0/16", + NetworkID: "20", + }, + { + Name: "udn-b", + Namespace: "frontend-b", + Network: "frontend-b-udn", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.21.0.0/16", + NetworkID: "21", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{"frontend-a/udn-a", "frontend-b/udn-b"}, + expectSelectedNetworks: []string{"layer3_20", "layer3_21"}, + expectTunnelIDAllocated: true, + expectSubnetsAllocated: true, + expectCacheEntryExists: true, + }, + { + name: "skips namespace with default network (no primary UDN)", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "ns-with-udn", Labels: map[string]string{"selected": "true"}}, + {Name: "ns-default", Labels: map[string]string{"selected": "true"}}, // no primary UDN configured + }, + nads: []*testNAD{ + { + Name: "primary-udn", + Namespace: "ns-with-udn", + Network: "ns-with-udn-network", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.30.0.0/16", + NetworkID: "30", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{"ns-with-udn/primary-udn"}, + expectSelectedNetworks: []string{"layer3_30"}, + expectTunnelIDAllocated: true, + expectSubnetsAllocated: true, + expectCacheEntryExists: true, + }, + // Combo selector tests + { + name: "selects both CUDN and primary UDN with combo selector", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"type": "cudn"}, + }, + }, + }, + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"type": "udn"}, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "cudn-ns", Labels: map[string]string{"type": "cudn"}}, + {Name: "udn-ns", Labels: map[string]string{"type": "udn"}}, + }, + nads: []*testNAD{ + { + Name: "cudn-nad", + Namespace: "cudn-ns", + Network: util.GenerateCUDNNetworkName("my-cudn"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.40.0.0/16", + Labels: map[string]string{"type": "cudn"}, + NetworkID: "40", + }, + { + Name: "udn-nad", + Namespace: "udn-ns", + Network: "udn-ns-network", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.41.0.0/16", + NetworkID: "41", + }, + }, + reconcile: "test-cnc", + expectSelectedNADs: []string{"cudn-ns/cudn-nad", "udn-ns/udn-nad"}, + expectSelectedNetworks: []string{"layer3_40", "layer3_41"}, + expectTunnelIDAllocated: true, + expectSubnetsAllocated: true, + expectCacheEntryExists: true, + }, + // Graceful handling tests - these used to error but now skip gracefully + { + name: "skips namespace when UDN was deleted (InvalidPrimaryNetworkError) and continues reconciliation", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"requires-udn": "true"}, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "pending-ns", Labels: map[string]string{"requires-udn": "true"}, RequiresUDN: true}, + }, + nads: []*testNAD{}, + reconcile: "test-cnc", + // No error - we gracefully skip namespaces with deleted UDNs so subnet release can proceed + wantErr: false, + expectSelectedNADs: []string{}, + expectSelectedNetworks: []string{}, + expectTunnelIDAllocated: true, + expectSubnetsAllocated: false, // no subnets allocated since no networks matched + expectCacheEntryExists: true, + }, + // Error condition tests + { + name: "errors when more than 1 primary NAD is found for namespace", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"multi-nad": "true"}, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "multi-nad-ns", Labels: map[string]string{"multi-nad": "true"}}, + }, + nads: []*testNAD{ + { + Name: "primary-udn-1", + Namespace: "multi-nad-ns", + Network: "multi-nad-ns-network", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.50.0.0/16", + NetworkID: "50", + }, + { + Name: "primary-udn-2", + Namespace: "multi-nad-ns", + Network: "multi-nad-ns-network-2", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Subnet: "10.51.0.0/16", + NetworkID: "51", + }, + }, + // Multiple NADs with IsUDN=true for same namespace - auto-triggers error + reconcile: "test-cnc", + wantErr: true, + }, + { + name: "errors on unsupported network selection type", + cnc: &testCNC{ + Name: "test-cnc", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: "UnsupportedType", + }, + }, + }, + reconcile: "test-cnc", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + gMaxLength := format.MaxLength + format.MaxLength = 0 + defer func() { format.MaxLength = gMaxLength }() + + config.IPv4Mode = true + config.IPv6Mode = true + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableNetworkConnect = true + + fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() + ovntest.AddNetworkConnectApplyReactor(fakeClientset.NetworkConnectClient.(*networkconnectfake.Clientset)) + + // Create test CNC + if tt.cnc != nil { + _, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), tt.cnc.ClusterNetworkConnect(), metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + for _, nad := range tt.nads { + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(nad.Namespace).Create( + context.Background(), nad.NAD(), metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + for _, ns := range tt.namespaces { + _, err := fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns.Namespace(), metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + wf, err := factory.NewClusterManagerWatchFactory(fakeClientset) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + err = wf.Start() + g.Expect(err).ToNot(gomega.HaveOccurred()) + defer wf.Shutdown() + + // Wait for informer caches to sync + syncCtx, syncCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer syncCancel() + synced := cache.WaitForCacheSync( + syncCtx.Done(), + wf.NADInformer().Informer().HasSynced, + wf.ClusterNetworkConnectInformer().Informer().HasSynced, + wf.NamespaceInformer().Informer().HasSynced, + ) + g.Expect(synced).To(gomega.BeTrue(), "informer caches should sync") + + // Create fake network manager and auto-configure from nads and namespaces + fakeNM := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + // Auto-populate PrimaryNetworks from NADs with IsUDN=true and IsPrimary=true + // Group NADs by namespace for the FakeNetworkManager + nadsByNamespace := make(map[string][]*testNAD) + for _, nad := range tt.nads { + if nad.IsUDN && nad.IsPrimary { + nadsByNamespace[nad.Namespace] = append(nadsByNamespace[nad.Namespace], nad) + } + } + for namespace, nads := range nadsByNamespace { + // Use the first NAD to create the NetInfo + firstNAD := nads[0] + nadKey := fmt.Sprintf("%s/%s", firstNAD.Namespace, firstNAD.Name) + nad, err := wf.NADInformer().Lister().NetworkAttachmentDefinitions(namespace).Get(firstNAD.Name) + g.Expect(err).ToNot(gomega.HaveOccurred(), "NAD %s should exist", nadKey) + netInfo, err := util.ParseNADInfo(nad) + g.Expect(err).ToNot(gomega.HaveOccurred(), "NAD %s should be parseable", nadKey) + mutableNetInfo := util.NewMutableNetInfo(netInfo) + // Add all NAD keys to the NetInfo + for _, n := range nads { + mutableNetInfo.AddNADs(fmt.Sprintf("%s/%s", n.Namespace, n.Name)) + } + fakeNM.PrimaryNetworks[namespace] = mutableNetInfo + } + + // Auto-configure UDN namespaces from namespaces with RequiresUDN=true + for _, ns := range tt.namespaces { + if ns.RequiresUDN { + if fakeNM.UDNNamespaces == nil { + fakeNM.UDNNamespaces = sets.New[string]() + } + fakeNM.UDNNamespaces.Insert(ns.Name) + } + } + + tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") + + c := NewController(wf, fakeClientset, fakeNM.Interface(), tunnelKeysAllocator) + + // Pre-populate cache for deletion test + if tt.expectCacheEntryDeleted { + c.cncCache[tt.reconcile] = &clusterNetworkConnectState{ + name: tt.reconcile, + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + tunnelID: 12345, + } + } + + // Run reconciliation + err = c.reconcileClusterNetworkConnect(tt.reconcile) + if tt.wantErr { + g.Expect(err).To(gomega.HaveOccurred()) + return + } + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Verify cache state + if tt.expectCacheEntryDeleted { + _, exists := c.cncCache[tt.reconcile] + g.Expect(exists).To(gomega.BeFalse(), "cache entry should be deleted") + return + } + + if tt.expectCacheEntryExists { + cncState, exists := c.cncCache[tt.reconcile] + g.Expect(exists).To(gomega.BeTrue(), "cache entry should exist") + + // Verify selected NADs + g.Expect(cncState.selectedNADs.UnsortedList()).To(gomega.ConsistOf(tt.expectSelectedNADs)) + + // Verify selected networks + g.Expect(cncState.selectedNetworks.UnsortedList()).To(gomega.ConsistOf(tt.expectSelectedNetworks)) + + // Fetch the updated CNC for annotation verification + updatedCNC, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), tt.reconcile, metav1.GetOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Verify tunnel ID allocation and annotation + if tt.expectTunnelIDAllocated && len(tt.expectSelectedNADs) > 0 { + g.Expect(cncState.tunnelID).ToNot(gomega.BeZero(), "tunnel ID should be allocated") + tunnelKeyAnnotation, ok := updatedCNC.Annotations[util.OvnConnectRouterTunnelKeyAnnotation] + g.Expect(ok).To(gomega.BeTrue(), "tunnel key annotation should exist") + g.Expect(tunnelKeyAnnotation).To(gomega.Equal(fmt.Sprintf("%d", cncState.tunnelID)), + "tunnel key annotation should match cache") + } + + // Verify subnet allocation annotation + if tt.expectSubnetsAllocated && len(tt.expectSelectedNetworks) > 0 { + subnetAnnotation, ok := updatedCNC.Annotations["k8s.ovn.org/network-connect-subnet"] + g.Expect(ok).To(gomega.BeTrue(), "subnet annotation should exist") + var subnetsMap map[string]util.NetworkConnectSubnetAnnotation + err = json.Unmarshal([]byte(subnetAnnotation), &subnetsMap) + g.Expect(err).ToNot(gomega.HaveOccurred(), "subnet annotation should be valid JSON") + // Verify that the number of subnet entries matches the expected selected networks + // The annotation uses owner keys (e.g., "layer3_1", "layer2_2") not network names + g.Expect(subnetsMap).To(gomega.HaveLen(len(tt.expectSelectedNetworks)), + "number of subnet entries should match expected networks") + // Verify each entry has both IPv4 and IPv6 subnets (since both modes are enabled) + for owner, subnetEntry := range subnetsMap { + g.Expect(subnetEntry.IPv4).ToNot(gomega.BeEmpty(), + "subnet entry %s should have IPv4 address", owner) + g.Expect(subnetEntry.IPv6).ToNot(gomega.BeEmpty(), + "subnet entry %s should have IPv6 address", owner) + } + } + } + }) + } +} + +func TestCNCNeedsUpdate(t *testing.T) { + tests := []struct { + name string + oldObj *networkconnectv1.ClusterNetworkConnect + newObj *networkconnectv1.ClusterNetworkConnect + wantUpdate bool + }{ + { + name: "CNC is being created", + oldObj: nil, + newObj: &networkconnectv1.ClusterNetworkConnect{}, + wantUpdate: true, + }, + { + name: "CNC is being deleted", + oldObj: &networkconnectv1.ClusterNetworkConnect{}, + newObj: nil, + wantUpdate: true, + }, + { + name: "NetworkSelectors changed", + oldObj: &networkconnectv1.ClusterNetworkConnect{ + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + NetworkSelectors: []apitypes.NetworkSelector{ + {NetworkSelectionType: apitypes.ClusterUserDefinedNetworks}, + }, + }, + }, + newObj: &networkconnectv1.ClusterNetworkConnect{ + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + NetworkSelectors: []apitypes.NetworkSelector{ + {NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks}, + }, + }, + }, + wantUpdate: true, + }, + { + name: "NetworkSelectors unchanged", + oldObj: &networkconnectv1.ClusterNetworkConnect{ + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + NetworkSelectors: []apitypes.NetworkSelector{ + {NetworkSelectionType: apitypes.ClusterUserDefinedNetworks}, + }, + }, + }, + newObj: &networkconnectv1.ClusterNetworkConnect{ + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + NetworkSelectors: []apitypes.NetworkSelector{ + {NetworkSelectionType: apitypes.ClusterUserDefinedNetworks}, + }, + }, + }, + wantUpdate: false, + }, + { + name: "Connectivity changed (should not trigger update)", + oldObj: &networkconnectv1.ClusterNetworkConnect{ + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + Connectivity: []networkconnectv1.ConnectivityType{networkconnectv1.PodNetwork}, + }, + }, + newObj: &networkconnectv1.ClusterNetworkConnect{ + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + Connectivity: []networkconnectv1.ConnectivityType{networkconnectv1.ClusterIPServiceNetwork}, + }, + }, + wantUpdate: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + result := cncNeedsUpdate(tt.oldObj, tt.newObj) + g.Expect(result).To(gomega.Equal(tt.wantUpdate)) + }) + } +} + +func TestController_reconcileNAD(t *testing.T) { + tests := []struct { + name string + cncs []*testCNC + nads []*testNAD + prePopulateCache map[string]*clusterNetworkConnectState + reconcileNAD string + expectCNCReconciled []string + expectNoReconcile bool + }{ + { + name: "NAD creation triggers CNC reconciliation if it matches CNC selector", + cncs: []*testCNC{ + { + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + }, + nads: []*testNAD{ + { + Name: "cudn-test", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("test"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Labels: map[string]string{"selected": "true"}, + NetworkID: "1", + }, + }, + prePopulateCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + reconcileNAD: "test/cudn-test", + expectCNCReconciled: []string{"cnc1"}, + }, + { + name: "NAD matching only one of two CNCs triggers reconciliation for only that CNC", + cncs: []*testCNC{ + { + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + { + Name: "cnc2", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"other": "label"}, + }, + }, + }, + }, + }, + }, + nads: []*testNAD{ + { + Name: "cudn-test", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("test"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Labels: map[string]string{"selected": "true"}, // matches cnc1, not cnc2 + NetworkID: "1", + }, + }, + prePopulateCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + "cnc2": { + name: "cnc2", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + reconcileNAD: "test/cudn-test", + expectCNCReconciled: []string{"cnc1"}, + }, + { + name: "NAD not matching (and previously not matching) any CNC selector does not trigger reconciliation", + cncs: []*testCNC{ + { + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + }, + nads: []*testNAD{ + { + Name: "cudn-test", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("test"), + IsCUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + Labels: map[string]string{"selected": "false"}, // doesn't match + NetworkID: "1", + }, + }, + prePopulateCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + reconcileNAD: "test/cudn-test", + expectNoReconcile: true, + }, + { + name: "NAD deletion triggers CNC reconciliation if it was previously selected", + cncs: []*testCNC{ + { + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + }, + nads: []*testNAD{}, // NAD is deleted + prePopulateCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New("test/cudn-test"), // was selected before + selectedNetworks: sets.New("layer3_1"), + }, + }, + reconcileNAD: "test/cudn-test", + expectCNCReconciled: []string{"cnc1"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + gMaxLength := format.MaxLength + format.MaxLength = 0 + defer func() { format.MaxLength = gMaxLength }() + + config.IPv4Mode = true + config.IPv6Mode = false + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableNetworkConnect = true + + fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() + ovntest.AddNetworkConnectApplyReactor(fakeClientset.NetworkConnectClient.(*networkconnectfake.Clientset)) + + // Create test CNCs + for _, cnc := range tt.cncs { + _, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc.ClusterNetworkConnect(), metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + // Create test NADs + for _, nad := range tt.nads { + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(nad.Namespace).Create( + context.Background(), nad.NAD(), metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + wf, err := factory.NewClusterManagerWatchFactory(fakeClientset) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + err = wf.Start() + g.Expect(err).ToNot(gomega.HaveOccurred()) + defer wf.Shutdown() + + // Wait for informer caches to sync + syncCtx, syncCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer syncCancel() + synced := cache.WaitForCacheSync( + syncCtx.Done(), + wf.NADInformer().Informer().HasSynced, + wf.ClusterNetworkConnectInformer().Informer().HasSynced, + ) + g.Expect(synced).To(gomega.BeTrue(), "informer caches should sync") + + fakeNM := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") + c := NewController(wf, fakeClientset, fakeNM.Interface(), tunnelKeysAllocator) + + // Pre-populate cache + for name, state := range tt.prePopulateCache { + c.cncCache[name] = state + } + + // Track reconciled CNCs using a set to handle duplicate reconciles + reconciledCNCs := sets.New[string]() + reconciledMutex := sync.Mutex{} + + // Replace the CNC controller with a mock that tracks reconciliations + cncCfg := &controllerutil.ControllerConfig[networkconnectv1.ClusterNetworkConnect]{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Informer: wf.ClusterNetworkConnectInformer().Informer(), + Lister: wf.ClusterNetworkConnectInformer().Lister().List, + Reconcile: func(key string) error { + reconciledMutex.Lock() + defer reconciledMutex.Unlock() + reconciledCNCs.Insert(key) + return nil + }, + ObjNeedsUpdate: cncNeedsUpdate, + Threadiness: 1, + } + c.cncController = controllerutil.NewController( + "test-cnc-controller", + cncCfg, + ) + + err = controllerutil.Start(c.cncController) + g.Expect(err).ToNot(gomega.HaveOccurred()) + defer controllerutil.Stop(c.cncController) + + // Wait for initial controller sync to reconcile all CNCs, + // then clear the recorded reconciliations. + // Post this its correct to check if reconcileNAD added it + // back or didn't i.e if we called reconcileClusterNetworkConnect + // or not. + g.Eventually(func() int { + reconciledMutex.Lock() + defer reconciledMutex.Unlock() + return reconciledCNCs.Len() + }).Should(gomega.BeNumerically(">=", len(tt.cncs))) + reconciledMutex.Lock() + reconciledCNCs = sets.New[string]() + reconciledMutex.Unlock() + + // Run NAD reconciliation + err = c.reconcileNAD(tt.reconcileNAD) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Allow time for async reconciliation + if tt.expectNoReconcile { + g.Consistently(func() []string { + reconciledMutex.Lock() + defer reconciledMutex.Unlock() + return reconciledCNCs.UnsortedList() + }).Should(gomega.BeEmpty()) + } else { + g.Eventually(func() []string { + reconciledMutex.Lock() + defer reconciledMutex.Unlock() + return reconciledCNCs.UnsortedList() + }).Should(gomega.ConsistOf(tt.expectCNCReconciled)) + } + }) + } +} + +func TestNADNeedsUpdate(t *testing.T) { + cudnOwner := makeCUDNOwnerRef("test-cudn") + udnOwner := makeUDNOwnerRef("test-udn") + + makePrimaryNADConfig := func(name string) string { + return fmt.Sprintf(`{"cniVersion": "0.4.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "role": "primary", "netAttachDefName": "test/%s"}`, name, name) + } + + makeSecondaryNADConfig := func(name string) string { + return fmt.Sprintf(`{"cniVersion": "0.4.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "netAttachDefName": "test/%s"}`, name, name) + } + + tests := []struct { + name string + oldObj *nadv1.NetworkAttachmentDefinition + newObj *nadv1.NetworkAttachmentDefinition + wantUpdate bool + }{ + { + name: "NAD without owner is ignored", + oldObj: nil, + newObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test"}, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + wantUpdate: false, + }, + { + name: "CUDN NAD being created", + oldObj: nil, + newObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + wantUpdate: true, + }, + { + name: "CUDN NAD being deleted", + oldObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + newObj: nil, + wantUpdate: true, + }, + { + name: "UDN NAD being created", + oldObj: nil, + newObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{udnOwner}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + wantUpdate: true, + }, + { + name: "UDN NAD being deleted", + oldObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{udnOwner}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + newObj: nil, + wantUpdate: true, + }, + { + name: "NAD labels changed", + oldObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + Labels: map[string]string{"old": "label"}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + newObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + Labels: map[string]string{"new": "label"}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + wantUpdate: true, + }, + { + name: "NAD network ID annotation changed", + oldObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + Annotations: map[string]string{types.OvnNetworkIDAnnotation: "1"}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + newObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + Annotations: map[string]string{types.OvnNetworkIDAnnotation: "2"}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + wantUpdate: true, + }, + { + name: "NAD unchanged", + oldObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + Labels: map[string]string{"same": "label"}, + Annotations: map[string]string{types.OvnNetworkIDAnnotation: "1"}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + newObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + Labels: map[string]string{"same": "label"}, + Annotations: map[string]string{types.OvnNetworkIDAnnotation: "1"}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makePrimaryNADConfig("test")}, + }, + wantUpdate: false, + }, + { + name: "secondary NAD is ignored", + oldObj: nil, + newObj: &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + OwnerReferences: []metav1.OwnerReference{cudnOwner}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{Config: makeSecondaryNADConfig("test")}, + }, + wantUpdate: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + result := nadNeedsUpdate(tt.oldObj, tt.newObj) + g.Expect(result).To(gomega.Equal(tt.wantUpdate)) + }) + } +} + +func TestMustProcessCNCForNAD(t *testing.T) { + tests := []struct { + // name is the test case name + name string + // cnc is the ClusterNetworkConnect to test against + cnc *testCNC + // nad is the NAD being processed that may or may not match the CNC selector + nad *testNAD + // namespaces is the list of namespaces to create (needed for PrimaryUserDefinedNetworks selector tests) + namespaces []testNamespace + // cncCacheState is the pre-existing CNC cache state (nil means CNC not in cache) + cncCacheState *clusterNetworkConnectState + // mustProcessCNC is the expected result of mustProcessCNCForNAD + mustProcessCNC bool + }{ + { + name: "Primary CUDN owned NAD starts matching but CNC cache doesn't exist (CNC not created yet) - should NOT process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "cudn-test", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("test"), + IsCUDN: true, + IsPrimary: true, + Labels: map[string]string{"selected": "true"}, + }, + cncCacheState: nil, // CNC not in cache yet which means cncCreate has not happened yet + mustProcessCNC: false, + }, + { + name: "Primary CUDN owned NAD starts matching - should process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "cudn-test", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("test"), + IsCUDN: true, + IsPrimary: true, + Labels: map[string]string{"selected": "true"}, + }, + cncCacheState: &clusterNetworkConnectState{ + name: "cnc1", + selectedNADs: sets.New[string](), // empty - NAD not selected before + selectedNetworks: sets.New[string](), + }, + mustProcessCNC: true, + }, + { + name: "Primary CUDN owned NAD stops matching - should process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "cudn-test", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("test"), + IsCUDN: true, + IsPrimary: true, + Labels: map[string]string{"selected": "false"}, // no longer matches + }, + cncCacheState: &clusterNetworkConnectState{ + name: "cnc1", + selectedNADs: sets.New("test/cudn-test"), // was selected before + selectedNetworks: sets.New("layer3_1"), + }, + mustProcessCNC: true, + }, + { + name: "Primary CUDN owned NAD still matches - should process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "cudn-test", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("test"), + IsCUDN: true, + IsPrimary: true, + Labels: map[string]string{"selected": "true"}, + }, + cncCacheState: &clusterNetworkConnectState{ + name: "cnc1", + selectedNADs: sets.New("test/cudn-test"), // already selected + selectedNetworks: sets.New("layer3_1"), + }, + mustProcessCNC: true, + }, + { + name: "Primary CUDN owned NAD still doesn't match - should NOT process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "cudn-test", + Namespace: "test", + Network: util.GenerateCUDNNetworkName("test"), + IsCUDN: true, + IsPrimary: true, + Labels: map[string]string{"selected": "false"}, + }, + cncCacheState: &clusterNetworkConnectState{ + name: "cnc1", + selectedNADs: sets.New[string](), // not selected before either + selectedNetworks: sets.New[string](), + }, + mustProcessCNC: false, + }, + // PrimaryUserDefinedNetworks selector tests + { + name: "Primary UDN owned NAD starts matching but CNC cache doesn't exist - should NOT process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "udn-test", + Namespace: "test", + Network: util.GenerateUDNNetworkName("test", "udn-test"), + IsUDN: true, + IsPrimary: true, + }, + namespaces: []testNamespace{ + {Name: "test", Labels: map[string]string{"selected": "true"}}, + }, + cncCacheState: nil, // CNC not in cache yet + mustProcessCNC: false, + }, + { + name: "Primary UDN owned NAD starts matching - should process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "udn-test", + Namespace: "test", + Network: util.GenerateUDNNetworkName("test", "udn-test"), + IsUDN: true, + IsPrimary: true, + }, + namespaces: []testNamespace{ + {Name: "test", Labels: map[string]string{"selected": "true"}}, + }, + cncCacheState: &clusterNetworkConnectState{ + name: "cnc1", + selectedNADs: sets.New[string](), // empty - NAD not selected before + selectedNetworks: sets.New[string](), + }, + mustProcessCNC: true, + }, + { + name: "Primary UDN owned NAD stops matching (namespace labels changed) - should process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "udn-test", + Namespace: "test", + Network: util.GenerateUDNNetworkName("test", "udn-test"), + IsUDN: true, + IsPrimary: true, + }, + namespaces: []testNamespace{ + {Name: "test", Labels: map[string]string{"selected": "false"}}, // namespace no longer matches + }, + cncCacheState: &clusterNetworkConnectState{ + name: "cnc1", + selectedNADs: sets.New("test/udn-test"), // was selected before + selectedNetworks: sets.New("layer3_1"), + }, + mustProcessCNC: true, + }, + { + name: "Primary UDN owned NAD still matches - should process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "udn-test", + Namespace: "test", + Network: util.GenerateUDNNetworkName("test", "udn-test"), + IsUDN: true, + IsPrimary: true, + }, + namespaces: []testNamespace{ + {Name: "test", Labels: map[string]string{"selected": "true"}}, + }, + cncCacheState: &clusterNetworkConnectState{ + name: "cnc1", + selectedNADs: sets.New("test/udn-test"), // already selected + selectedNetworks: sets.New("layer3_1"), + }, + mustProcessCNC: true, + }, + { + name: "Primary UDN owned NAD still doesn't match - should NOT process CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + nad: &testNAD{ + Name: "udn-test", + Namespace: "test", + Network: util.GenerateUDNNetworkName("test", "udn-test"), + IsUDN: true, + IsPrimary: true, + }, + namespaces: []testNamespace{ + {Name: "test", Labels: map[string]string{"selected": "false"}}, // namespace doesn't match + }, + cncCacheState: &clusterNetworkConnectState{ + name: "cnc1", + selectedNADs: sets.New[string](), // not selected before either + selectedNetworks: sets.New[string](), + }, + mustProcessCNC: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = false + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableNetworkConnect = true + + fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() + + // Create namespaces if provided (for PrimaryUserDefinedNetworks tests) + for _, ns := range tt.namespaces { + _, err := fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), + ns.Namespace(), + metav1.CreateOptions{}, + ) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + wf, err := factory.NewClusterManagerWatchFactory(fakeClientset) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + err = wf.Start() + g.Expect(err).ToNot(gomega.HaveOccurred()) + defer wf.Shutdown() + + fakeNM := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + // Auto-configure primary network from NAD when IsUDN && IsPrimary + if tt.nad != nil && tt.nad.IsUDN && tt.nad.IsPrimary { + netInfo, err := util.NewNetInfo(&ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: tt.nad.Network}, + Topology: types.Layer3Topology, + Role: types.NetworkRolePrimary, + }) + g.Expect(err).ToNot(gomega.HaveOccurred()) + mutableNetInfo := util.NewMutableNetInfo(netInfo) + mutableNetInfo.SetNADs(tt.nad.Namespace + "/" + tt.nad.Name) + fakeNM.PrimaryNetworks[tt.nad.Namespace] = mutableNetInfo + } + + tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") + c := NewController(wf, fakeClientset, fakeNM.Interface(), tunnelKeysAllocator) + + // Pre-populate cache if provided + if tt.cncCacheState != nil { + c.cncCache[tt.cnc.Name] = tt.cncCacheState + } + + var nad *nadv1.NetworkAttachmentDefinition + if tt.nad != nil { + nad = tt.nad.NAD() + } + + cnc := tt.cnc.ClusterNetworkConnect() + nadKey := "" + if tt.nad != nil { + nadKey = tt.nad.Namespace + "/" + tt.nad.Name + } + + result := c.mustProcessCNCForNAD(nad, cnc, nadKey) + g.Expect(result).To(gomega.Equal(tt.mustProcessCNC)) + }) + } +} + +func TestNamespaceNeedsUpdate(t *testing.T) { + tests := []struct { + name string + oldObj *corev1.Namespace + newObj *corev1.Namespace + wantUpdate bool + }{ + { + name: "namespace is being created (oldObj nil)", + oldObj: nil, + newObj: &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "test"}}, + wantUpdate: false, + }, + { + name: "namespace is being deleted (newObj nil)", + oldObj: &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "test"}}, + newObj: nil, + wantUpdate: false, + }, + { + name: "namespace without UDN label - labels changed", + oldObj: &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Labels: map[string]string{"old": "label"}, + }, + }, + newObj: &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Labels: map[string]string{"new": "label"}, + }, + }, + wantUpdate: false, // no UDN label, so we don't care + }, + { + name: "namespace with UDN label - labels changed", + oldObj: &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Labels: map[string]string{types.RequiredUDNNamespaceLabel: ""}, + }, + }, + newObj: &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Labels: map[string]string{types.RequiredUDNNamespaceLabel: "", "new": "label"}, + }, + }, + wantUpdate: true, + }, + { + name: "namespace with UDN label - labels unchanged", + oldObj: &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Labels: map[string]string{types.RequiredUDNNamespaceLabel: "", "same": "label"}, + }, + }, + newObj: &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Labels: map[string]string{types.RequiredUDNNamespaceLabel: "", "same": "label"}, + }, + }, + wantUpdate: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + result := namespaceNeedsUpdate(tt.oldObj, tt.newObj) + g.Expect(result).To(gomega.Equal(tt.wantUpdate)) + }) + } +} + +func TestMustProcessCNCForNamespace(t *testing.T) { + tests := []struct { + name string + cnc *testCNC + namespace *testNamespace + primaryNAD string + cncCache map[string]*clusterNetworkConnectState + mustProcessCNC bool + }{ + { + name: "CNC cache doesn't exist - should NOT process", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "test-ns", Labels: map[string]string{"selected": "true"}}, + primaryNAD: "test-ns/primary-udn", + cncCache: nil, + mustProcessCNC: false, + }, + { + name: "namespace starts matching CNC - should process", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "test-ns", Labels: map[string]string{"selected": "true"}}, + primaryNAD: "test-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + mustProcessCNC: true, + }, + { + name: "namespace stops matching CNC - should process", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "test-ns", Labels: map[string]string{"selected": "false"}}, + primaryNAD: "test-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New("test-ns/primary-udn"), // was previously selected + selectedNetworks: sets.New("layer3_1"), + }, + }, + mustProcessCNC: true, + }, + { + name: "namespace continues to match CNC - should NOT process", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "test-ns", Labels: map[string]string{"selected": "true"}}, + primaryNAD: "test-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New("test-ns/primary-udn"), + selectedNetworks: sets.New("layer3_1"), + }, + }, + mustProcessCNC: false, // state unchanged + }, + { + name: "namespace continues to NOT match CNC - should NOT process", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "test-ns", Labels: map[string]string{"selected": "false"}}, + primaryNAD: "test-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + mustProcessCNC: false, // state unchanged + }, + { + name: "CNC with CUDN selector ignores namespace changes", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"type": "cudn"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "test-ns", Labels: map[string]string{"selected": "true"}}, + primaryNAD: "test-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + mustProcessCNC: false, // CUDN selector doesn't care about namespace labels + }, + // Multiple CNC tests + { + name: "multiple CNCs in cache - only checks the specific CNC", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "frontend-ns", Labels: map[string]string{"tier": "frontend"}}, + primaryNAD: "frontend-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), // not selected before + selectedNetworks: sets.New[string](), + }, + "cnc2": { + name: "cnc2", + selectedNADs: sets.New("frontend-ns/primary-udn"), // cnc2 already selected this NAD + selectedNetworks: sets.New("layer3_1"), + }, + }, + mustProcessCNC: true, // cnc1 state changed (started matching) + }, + { + name: "multiple CNCs - CNC not in cache while others exist", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "frontend-ns", Labels: map[string]string{"tier": "frontend"}}, + primaryNAD: "frontend-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + // cnc1 not in cache, but cnc2 is + "cnc2": { + name: "cnc2", + selectedNADs: sets.New("frontend-ns/primary-udn"), + selectedNetworks: sets.New("layer3_1"), + }, + }, + mustProcessCNC: false, // cnc1 not in cache, so don't process + }, + // Multiple selectors tests - verify OR semantics across selectors + { + name: "multiple PUDN selectors - namespace matches first selector only (OR semantics)", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, // namespace matches THIS one + }, + }, + }, + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "backend"}, // namespace does NOT match this one + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "frontend-ns", Labels: map[string]string{"tier": "frontend"}}, + primaryNAD: "frontend-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), // not selected before + selectedNetworks: sets.New[string](), + }, + }, + mustProcessCNC: true, // should process because namespace matches FIRST selector (OR semantics) + }, + { + name: "multiple PUDN selectors - namespace matches second selector only (OR semantics)", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, // namespace does NOT match this one + }, + }, + }, + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "backend"}, // namespace matches THIS one + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "backend-ns", Labels: map[string]string{"tier": "backend"}}, + primaryNAD: "backend-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), // not selected before + selectedNetworks: sets.New[string](), + }, + }, + mustProcessCNC: true, // should process because namespace matches SECOND selector (OR semantics) + }, + { + name: "multiple PUDN selectors - namespace matches neither (no state change)", + cnc: &testCNC{ + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "backend"}, + }, + }, + }, + }, + }, + namespace: &testNamespace{Name: "other-ns", Labels: map[string]string{"tier": "database"}}, + primaryNAD: "other-ns/primary-udn", + cncCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), // not selected before + selectedNetworks: sets.New[string](), + }, + }, + mustProcessCNC: false, // no state change (was not selected, still not selected) + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + c := &Controller{ + cncCache: make(map[string]*clusterNetworkConnectState), + } + + // Populate cache with all entries + for name, state := range tt.cncCache { + c.cncCache[name] = state + } + + cnc := tt.cnc.ClusterNetworkConnect() + namespace := tt.namespace.Namespace() + + result := c.mustProcessCNCForNamespace(cnc, namespace, tt.primaryNAD) + g.Expect(result).To(gomega.Equal(tt.mustProcessCNC)) + }) + } +} + +func TestController_reconcileNamespace(t *testing.T) { + tests := []struct { + name string + cncs []*testCNC + nads []*testNAD + namespaces []*testNamespace + prePopulateCache map[string]*clusterNetworkConnectState + reconcileNamespace string + expectCNCReconciled []string + expectNoReconcile bool + }{ + { + name: "namespace label change triggers CNC reconciliation for matching Primary UDN selector", + cncs: []*testCNC{ + { + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "frontend-ns", Labels: map[string]string{"tier": "frontend"}}, + }, + nads: []*testNAD{ + { + Name: "primary-udn", + Namespace: "frontend-ns", + Network: "frontend-ns-network", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + NetworkID: "1", + }, + }, + prePopulateCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), // not selected before + selectedNetworks: sets.New[string](), + }, + }, + reconcileNamespace: "frontend-ns", + expectCNCReconciled: []string{"cnc1"}, + }, + { + name: "namespace with default network does not trigger CNC reconciliation", + cncs: []*testCNC{ + { + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "default-ns", Labels: map[string]string{"tier": "frontend"}}, + }, + nads: []*testNAD{}, // no UDN NADs + prePopulateCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + reconcileNamespace: "default-ns", + expectNoReconcile: true, + }, + { + name: "namespace matching one of two CNCs triggers only that CNC", + cncs: []*testCNC{ + { + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + }, + }, + { + Name: "cnc2", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "backend"}, + }, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{ + {Name: "frontend-ns", Labels: map[string]string{"tier": "frontend"}}, + }, + nads: []*testNAD{ + { + Name: "primary-udn", + Namespace: "frontend-ns", + Network: "frontend-ns-network", + IsUDN: true, + IsPrimary: true, + Topology: types.Layer3Topology, + NetworkID: "1", + }, + }, + prePopulateCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + "cnc2": { + name: "cnc2", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + reconcileNamespace: "frontend-ns", + expectCNCReconciled: []string{"cnc1"}, // only cnc1 matches + }, + { + name: "deleted namespace does not panic and does not trigger CNC reconciliation", + cncs: []*testCNC{ + { + Name: "cnc1", + NetworkSelectors: []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + }, + }, + }, + namespaces: []*testNamespace{}, // namespace does not exist (deleted) + nads: []*testNAD{}, + prePopulateCache: map[string]*clusterNetworkConnectState{ + "cnc1": { + name: "cnc1", + selectedNADs: sets.New[string](), + selectedNetworks: sets.New[string](), + }, + }, + reconcileNamespace: "deleted-namespace", // namespace that doesn't exist + expectNoReconcile: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + gMaxLength := format.MaxLength + format.MaxLength = 0 + defer func() { format.MaxLength = gMaxLength }() + + config.IPv4Mode = true + config.IPv6Mode = true + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableNetworkConnect = true + + fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() + ovntest.AddNetworkConnectApplyReactor(fakeClientset.NetworkConnectClient.(*networkconnectfake.Clientset)) + + // Create test CNCs + for _, cnc := range tt.cncs { + _, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc.ClusterNetworkConnect(), metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + // Create test NADs + for _, nad := range tt.nads { + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(nad.Namespace).Create( + context.Background(), nad.NAD(), metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + // Create test namespaces + for _, ns := range tt.namespaces { + _, err := fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns.Namespace(), metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + wf, err := factory.NewClusterManagerWatchFactory(fakeClientset) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + err = wf.Start() + g.Expect(err).ToNot(gomega.HaveOccurred()) + defer wf.Shutdown() + + // Wait for informer caches to sync + syncCtx, syncCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer syncCancel() + synced := cache.WaitForCacheSync( + syncCtx.Done(), + wf.NADInformer().Informer().HasSynced, + wf.ClusterNetworkConnectInformer().Informer().HasSynced, + wf.NamespaceInformer().Informer().HasSynced, + ) + g.Expect(synced).To(gomega.BeTrue(), "informer caches should sync") + + // Create fake network manager and auto-configure from nads + fakeNM := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + // Auto-populate PrimaryNetworks from NADs with IsUDN=true and IsPrimary=true + nadsByNamespace := make(map[string][]*testNAD) + for _, nad := range tt.nads { + if nad.IsUDN && nad.IsPrimary { + nadsByNamespace[nad.Namespace] = append(nadsByNamespace[nad.Namespace], nad) + } + } + for namespace, nads := range nadsByNamespace { + firstNAD := nads[0] + nadKey := fmt.Sprintf("%s/%s", firstNAD.Namespace, firstNAD.Name) + nad, err := wf.NADInformer().Lister().NetworkAttachmentDefinitions(namespace).Get(firstNAD.Name) + g.Expect(err).ToNot(gomega.HaveOccurred(), "NAD %s should exist", nadKey) + netInfo, err := util.ParseNADInfo(nad) + g.Expect(err).ToNot(gomega.HaveOccurred(), "NAD %s should be parseable", nadKey) + mutableNetInfo := util.NewMutableNetInfo(netInfo) + for _, n := range nads { + mutableNetInfo.AddNADs(fmt.Sprintf("%s/%s", n.Namespace, n.Name)) + } + fakeNM.PrimaryNetworks[namespace] = mutableNetInfo + } + + tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") + c := NewController(wf, fakeClientset, fakeNM.Interface(), tunnelKeysAllocator) + + // Pre-populate cache + for name, state := range tt.prePopulateCache { + c.cncCache[name] = state + } + + // Track reconciled CNCs using a set to handle duplicate reconciles + reconciledCNCs := sets.New[string]() + reconciledMutex := sync.Mutex{} + + // Replace the CNC controller with a mock that tracks reconciliations + cncCfg := &controllerutil.ControllerConfig[networkconnectv1.ClusterNetworkConnect]{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Informer: wf.ClusterNetworkConnectInformer().Informer(), + Lister: wf.ClusterNetworkConnectInformer().Lister().List, + Reconcile: func(key string) error { + reconciledMutex.Lock() + defer reconciledMutex.Unlock() + reconciledCNCs.Insert(key) + return nil + }, + ObjNeedsUpdate: cncNeedsUpdate, + Threadiness: 1, + } + c.cncController = controllerutil.NewController( + "test-cnc-controller", + cncCfg, + ) + + err = controllerutil.Start(c.cncController) + g.Expect(err).ToNot(gomega.HaveOccurred()) + defer controllerutil.Stop(c.cncController) + + // Wait for initial controller sync to reconcile all CNCs, + // then clear the recorded reconciliations. + g.Eventually(func() int { + reconciledMutex.Lock() + defer reconciledMutex.Unlock() + return reconciledCNCs.Len() + }).Should(gomega.BeNumerically(">=", len(tt.cncs))) + reconciledMutex.Lock() + reconciledCNCs = sets.New[string]() + reconciledMutex.Unlock() + + // Run namespace reconciliation + err = c.reconcileNamespace(tt.reconcileNamespace) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Allow time for async reconciliation + if tt.expectNoReconcile { + g.Consistently(func() []string { + reconciledMutex.Lock() + defer reconciledMutex.Unlock() + return reconciledCNCs.UnsortedList() + }).Should(gomega.BeEmpty()) + } else { + g.Eventually(func() []string { + reconciledMutex.Lock() + defer reconciledMutex.Unlock() + return reconciledCNCs.UnsortedList() + }).Should(gomega.ConsistOf(tt.expectCNCReconciled)) + } + }) + } +} + +// expectedCNCCacheState represents the expected state of a CNC cache entry after initialSync +type expectedCNCCacheState struct { + tunnelID int + selectedNetworks []string +} + +// expectedSubnetAllocation represents an expected subnet allocation for verification +type expectedSubnetAllocation struct { + owner string + topology string // types.Layer3Topology or types.Layer2Topology + ipv4 string // expected IPv4 subnet CIDR + ipv6 string // expected IPv6 subnet CIDR (optional) +} + +// TestController_initialSync tests that initialSync correctly restores allocator state from CNC annotations. +// It verifies that tunnel keys and subnet allocations are restored, and that re-allocating the same owner +// returns the exact same subnet (idempotency). +func TestController_initialSync(t *testing.T) { + tests := []struct { + name string + // existingCNCs are CNC objects that exist before initialSync (with annotations set) + existingCNCs []*networkconnectv1.ClusterNetworkConnect + // expectCacheEntries maps CNC name to expected cache state after initialSync + expectCacheEntries map[string]expectedCNCCacheState + // verifyAllocations verifies that re-allocating the same owner returns exact same subnets + // Maps CNC name to list of expected allocations to verify + verifyAllocations map[string][]expectedSubnetAllocation + }{ + { + name: "restores single CNC with layer3 subnets", + existingCNCs: []*networkconnectv1.ClusterNetworkConnect{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc1", + Annotations: map[string]string{ + util.OvnConnectRouterTunnelKeyAnnotation: "5", + // IPv6 /120 blocks within fd00:10:244::/112 range (256 /120 blocks available) + "k8s.ovn.org/network-connect-subnet": `{"layer3_1":{"ipv4":"192.168.0.0/24","ipv6":"fd00:10:244::/120"},"layer3_2":{"ipv4":"192.168.1.0/24","ipv6":"fd00:10:244::100/120"}}`, + }, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + ConnectSubnets: []networkconnectv1.ConnectSubnet{ + {CIDR: "192.168.0.0/16", NetworkPrefix: 24}, + {CIDR: "fd00:10:244::/112", NetworkPrefix: 120}, + }, + Connectivity: []networkconnectv1.ConnectivityType{networkconnectv1.PodNetwork}, + }, + }, + }, + expectCacheEntries: map[string]expectedCNCCacheState{ + "cnc1": {tunnelID: 5, selectedNetworks: []string{"layer3_1", "layer3_2"}}, + }, + verifyAllocations: map[string][]expectedSubnetAllocation{ + "cnc1": { + {owner: "layer3_1", topology: types.Layer3Topology, ipv4: "192.168.0.0/24", ipv6: "fd00:10:244::/120"}, + {owner: "layer3_2", topology: types.Layer3Topology, ipv4: "192.168.1.0/24", ipv6: "fd00:10:244::100/120"}, + }, + }, + }, + { + name: "restores multiple CNCs with different subnets", + existingCNCs: []*networkconnectv1.ClusterNetworkConnect{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc1", + Annotations: map[string]string{ + util.OvnConnectRouterTunnelKeyAnnotation: "1", + // IPv6 /120 block within fd00:10:244::/112 range + "k8s.ovn.org/network-connect-subnet": `{"layer3_10":{"ipv4":"192.168.0.0/24","ipv6":"fd00:10:244::/120"}}`, + }, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + ConnectSubnets: []networkconnectv1.ConnectSubnet{ + {CIDR: "192.168.0.0/16", NetworkPrefix: 24}, + {CIDR: "fd00:10:244::/112", NetworkPrefix: 120}, + }, + Connectivity: []networkconnectv1.ConnectivityType{networkconnectv1.PodNetwork}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc2", + Annotations: map[string]string{ + util.OvnConnectRouterTunnelKeyAnnotation: "2", + "k8s.ovn.org/network-connect-subnet": `{"layer3_20":{"ipv4":"10.100.0.0/24"},"layer3_21":{"ipv4":"10.100.1.0/24"}}`, + }, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + ConnectSubnets: []networkconnectv1.ConnectSubnet{ + {CIDR: "10.100.0.0/16", NetworkPrefix: 24}, + }, + Connectivity: []networkconnectv1.ConnectivityType{networkconnectv1.PodNetwork}, + }, + }, + }, + expectCacheEntries: map[string]expectedCNCCacheState{ + "cnc1": {tunnelID: 1, selectedNetworks: []string{"layer3_10"}}, + "cnc2": {tunnelID: 2, selectedNetworks: []string{"layer3_20", "layer3_21"}}, + }, + verifyAllocations: map[string][]expectedSubnetAllocation{ + "cnc1": { + {owner: "layer3_10", topology: types.Layer3Topology, ipv4: "192.168.0.0/24", ipv6: "fd00:10:244::/120"}, + }, + "cnc2": { + {owner: "layer3_20", topology: types.Layer3Topology, ipv4: "10.100.0.0/24"}, + {owner: "layer3_21", topology: types.Layer3Topology, ipv4: "10.100.1.0/24"}, + }, + }, + }, + { + name: "restores CNC with layer2 subnets and pool blocks", + existingCNCs: []*networkconnectv1.ClusterNetworkConnect{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc-layer2", + Annotations: map[string]string{ + util.OvnConnectRouterTunnelKeyAnnotation: "10", + "k8s.ovn.org/network-connect-subnet": `{"layer2_100":{"ipv4":"192.168.0.0/31","ipv6":"fd00:10:244::/127"},"layer2_101":{"ipv4":"192.168.0.2/31","ipv6":"fd00:10:244::2/127"}}`, + }, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + ConnectSubnets: []networkconnectv1.ConnectSubnet{ + {CIDR: "192.168.0.0/16", NetworkPrefix: 24}, + {CIDR: "fd00:10:244::/112", NetworkPrefix: 120}, + }, + Connectivity: []networkconnectv1.ConnectivityType{networkconnectv1.PodNetwork}, + }, + }, + }, + expectCacheEntries: map[string]expectedCNCCacheState{ + "cnc-layer2": {tunnelID: 10, selectedNetworks: []string{"layer2_100", "layer2_101"}}, + }, + verifyAllocations: map[string][]expectedSubnetAllocation{ + "cnc-layer2": { + {owner: "layer2_100", topology: types.Layer2Topology, ipv4: "192.168.0.0/31", ipv6: "fd00:10:244::/127"}, + {owner: "layer2_101", topology: types.Layer2Topology, ipv4: "192.168.0.2/31", ipv6: "fd00:10:244::2/127"}, + }, + }, + }, + { + name: "restores CNC with mixed layer3 and layer2 subnets", + existingCNCs: []*networkconnectv1.ClusterNetworkConnect{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc-mixed", + Annotations: map[string]string{ + util.OvnConnectRouterTunnelKeyAnnotation: "7", + // Layer3 gets /120 block, Layer2 gets /127 subnet within a /120 block + // IPv6 addresses must be within fd00:10:244::/112 range + "k8s.ovn.org/network-connect-subnet": `{"layer3_5":{"ipv4":"192.168.0.0/24","ipv6":"fd00:10:244::/120"},"layer2_6":{"ipv4":"192.168.1.0/31","ipv6":"fd00:10:244::100/127"}}`, + }, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + ConnectSubnets: []networkconnectv1.ConnectSubnet{ + {CIDR: "192.168.0.0/16", NetworkPrefix: 24}, + {CIDR: "fd00:10:244::/112", NetworkPrefix: 120}, + }, + Connectivity: []networkconnectv1.ConnectivityType{networkconnectv1.PodNetwork}, + }, + }, + }, + expectCacheEntries: map[string]expectedCNCCacheState{ + "cnc-mixed": {tunnelID: 7, selectedNetworks: []string{"layer3_5", "layer2_6"}}, + }, + verifyAllocations: map[string][]expectedSubnetAllocation{ + "cnc-mixed": { + {owner: "layer3_5", topology: types.Layer3Topology, ipv4: "192.168.0.0/24", ipv6: "fd00:10:244::/120"}, + {owner: "layer2_6", topology: types.Layer2Topology, ipv4: "192.168.1.0/31", ipv6: "fd00:10:244::100/127"}, + }, + }, + }, + { + name: "handles CNC with empty annotations gracefully", + existingCNCs: []*networkconnectv1.ClusterNetworkConnect{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "cnc-empty", + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + ConnectSubnets: []networkconnectv1.ConnectSubnet{ + {CIDR: "192.168.0.0/16", NetworkPrefix: 24}, + }, + Connectivity: []networkconnectv1.ConnectivityType{networkconnectv1.PodNetwork}, + }, + }, + }, + expectCacheEntries: map[string]expectedCNCCacheState{ + "cnc-empty": {tunnelID: 0, selectedNetworks: []string{}}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = true + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableNetworkConnect = true + + fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() + ovntest.AddNetworkConnectApplyReactor(fakeClientset.NetworkConnectClient.(*networkconnectfake.Clientset)) + + // Create existing CNCs (simulating state from before restart) + for _, cnc := range tt.existingCNCs { + _, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + wf, err := factory.NewClusterManagerWatchFactory(fakeClientset) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + err = wf.Start() + g.Expect(err).ToNot(gomega.HaveOccurred()) + defer wf.Shutdown() + + // Wait for informer caches to sync + syncCtx, syncCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer syncCancel() + synced := cache.WaitForCacheSync( + syncCtx.Done(), + wf.ClusterNetworkConnectInformer().Informer().HasSynced, + ) + g.Expect(synced).To(gomega.BeTrue(), "informer caches should sync") + + fakeNM := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") + c := NewController(wf, fakeClientset, fakeNM.Interface(), tunnelKeysAllocator) + + // Run initialSync + err = c.initialSync() + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Verify cache entries + for cncName, expected := range tt.expectCacheEntries { + cncState, exists := c.cncCache[cncName] + g.Expect(exists).To(gomega.BeTrue(), "cache entry for %s should exist", cncName) + + // Verify tunnel ID + g.Expect(cncState.tunnelID).To(gomega.Equal(expected.tunnelID), + "tunnel ID for %s should match", cncName) + + // Verify selected networks + g.Expect(cncState.selectedNetworks.UnsortedList()).To(gomega.ConsistOf(expected.selectedNetworks), + "selected networks for %s should match", cncName) + + // Verify allocator was populated (if there are subnets) + if len(expected.selectedNetworks) > 0 && cncState.allocator != nil { + // The allocator should have the ranges configured + // We verify this indirectly by checking that it exists and was set up + g.Expect(cncState.allocator).ToNot(gomega.BeNil(), + "allocator for %s should be initialized", cncName) + } + } + + // Verify that re-allocating the same owner returns exact same subnets (idempotency) + for cncName, allocations := range tt.verifyAllocations { + cncState := c.cncCache[cncName] + g.Expect(cncState).ToNot(gomega.BeNil(), "cache entry for %s should exist", cncName) + g.Expect(cncState.allocator).ToNot(gomega.BeNil(), "allocator for %s should exist", cncName) + + for _, expected := range allocations { + var allocatedSubnets []*net.IPNet + var err error + + if expected.topology == types.Layer3Topology { + allocatedSubnets, err = cncState.allocator.AllocateLayer3Subnet(expected.owner) + } else { + allocatedSubnets, err = cncState.allocator.AllocateLayer2Subnet(expected.owner) + } + g.Expect(err).ToNot(gomega.HaveOccurred(), + "re-allocation for owner %s should succeed", expected.owner) + g.Expect(allocatedSubnets).ToNot(gomega.BeEmpty(), + "re-allocation for owner %s should return subnets", expected.owner) + + // Build map of allocated subnets by type for comparison + allocatedByType := make(map[string]string) // "ipv4" or "ipv6" -> CIDR + for _, subnet := range allocatedSubnets { + if subnet.IP.To4() != nil { + allocatedByType["ipv4"] = subnet.String() + } else { + allocatedByType["ipv6"] = subnet.String() + } + } + + // Verify exact match of subnet values + if expected.ipv4 != "" { + g.Expect(allocatedByType["ipv4"]).To(gomega.Equal(expected.ipv4), + "owner %s: IPv4 subnet should match exactly", expected.owner) + } + if expected.ipv6 != "" { + g.Expect(allocatedByType["ipv6"]).To(gomega.Equal(expected.ipv6), + "owner %s: IPv6 subnet should match exactly", expected.owner) + } + } + } + }) + } +} diff --git a/go-controller/pkg/clustermanager/networkconnect/controller_suite_test.go b/go-controller/pkg/clustermanager/networkconnect/controller_suite_test.go new file mode 100644 index 0000000000..1e84c5a5ed --- /dev/null +++ b/go-controller/pkg/clustermanager/networkconnect/controller_suite_test.go @@ -0,0 +1,13 @@ +package networkconnect + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestNetworkConnectController(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "NetworkConnect Controller Suite") +} diff --git a/go-controller/pkg/clustermanager/networkconnect/controller_test.go b/go-controller/pkg/clustermanager/networkconnect/controller_test.go new file mode 100644 index 0000000000..28f531731d --- /dev/null +++ b/go-controller/pkg/clustermanager/networkconnect/controller_test.go @@ -0,0 +1,1591 @@ +package networkconnect + +import ( + "context" + "encoding/json" + "fmt" + "time" + + nadv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + "github.com/urfave/cli/v2" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + networkconnectfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned/fake" + apitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + userdefinednetworkv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// NOTE: This file tests the full controller in an integrated fashion. +// Of course the applyReactor is used to mock the k8s api server. + +const ovnNetworkConnectSubnetAnnotation = "k8s.ovn.org/network-connect-subnet" + +// ============================================================================= +// Shared test helpers for creating test objects +// ============================================================================= + +// newTestCNC creates a test CNC object with the given name, selectors, and connect subnets. +// If connectSubnets is nil, it defaults to 192.168.0.0/16 with /24 prefix. +func newTestCNC(name string, selectors []apitypes.NetworkSelector, connectSubnets []networkconnectv1.ConnectSubnet) *networkconnectv1.ClusterNetworkConnect { + if connectSubnets == nil { + connectSubnets = []networkconnectv1.ConnectSubnet{ + {CIDR: "192.168.0.0/16", NetworkPrefix: 24}, + } + } + return &networkconnectv1.ClusterNetworkConnect{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{ + NetworkSelectors: selectors, + ConnectSubnets: connectSubnets, + Connectivity: []networkconnectv1.ConnectivityType{ + networkconnectv1.PodNetwork, + }, + }, + } +} + +// newTestUDNNAD creates a test NAD owned by a UserDefinedNetwork. +func newTestUDNNAD(name, namespace, network string, networkID string) *nadv1.NetworkAttachmentDefinition { + return &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + types.OvnNetworkNameAnnotation: network, + types.OvnNetworkIDAnnotation: networkID, + }, + OwnerReferences: []metav1.OwnerReference{makeUDNOwnerRef(name)}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{ + Config: fmt.Sprintf( + `{"cniVersion": "0.4.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "%s/%s", "role": "primary", "subnets": "10.0.0.0/16/24"}`, + network, + config.CNI.Plugin, + namespace, + name, + ), + }, + } +} + +// newTestCUDNNAD creates a test NAD owned by a ClusterUserDefinedNetwork. +func newTestCUDNNAD(name, namespace, network string, labels map[string]string, networkID string) *nadv1.NetworkAttachmentDefinition { + nad := newTestUDNNAD(name, namespace, network, networkID) + nad.Labels = labels + nad.OwnerReferences = []metav1.OwnerReference{makeCUDNOwnerRef(network)} + return nad +} + +// newTestNamespace creates a test namespace with the given name and labels. +func newTestNamespace(name string, labels map[string]string) *corev1.Namespace { + return &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: labels, + }, + } +} + +var _ = ginkgo.Describe("NetworkConnect ClusterManager Controller Integration Tests", func() { + var ( + app *cli.App + controller *Controller + fakeClientset *util.OVNClusterManagerClientset + fakeNM *networkmanager.FakeNetworkManager + wf *factory.WatchFactory + ) + + // start initializes the controller with pre-populated objects (standard k8s objects only) + start := func(objects ...runtime.Object) { + fakeClientset = util.GetOVNClientset(objects...).GetClusterManagerClientset() + ovntest.AddNetworkConnectApplyReactor(fakeClientset.NetworkConnectClient.(*networkconnectfake.Clientset)) + + var err error + wf, err = factory.NewClusterManagerWatchFactory(fakeClientset) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeNM = &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") + controller = NewController(wf, fakeClientset, fakeNM.Interface(), tunnelKeysAllocator) + + err = wf.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = controller.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + + // Local aliases for shared helpers (for cleaner test code) + testCNC := func(name string, selectors []apitypes.NetworkSelector) *networkconnectv1.ClusterNetworkConnect { + return newTestCNC(name, selectors, nil) + } + testCUDNNAD := newTestCUDNNAD + testUDNNAD := newTestUDNNAD + + // Helper to get CNC annotations + getCNCAnnotations := func(cncName string) (map[string]string, error) { + cnc, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cncName, metav1.GetOptions{}) + if err != nil { + return nil, err + } + return cnc.Annotations, nil + } + + // Helper to verify CNC has tunnel ID annotation + hasTunnelIDAnnotation := func(cncName string) bool { + annotations, err := getCNCAnnotations(cncName) + if err != nil { + return false + } + _, exists := annotations[util.OvnConnectRouterTunnelKeyAnnotation] + return exists + } + + // Helper to verify CNC has non-empty subnet annotation + hasNonEmptySubnetAnnotation := func(cncName string) bool { + annotations, err := getCNCAnnotations(cncName) + if err != nil { + return false + } + subnetAnnotation, exists := annotations[ovnNetworkConnectSubnetAnnotation] + if !exists { + return false + } + return subnetAnnotation != "{}" + } + + // Helper to get subnet annotation network count + getSubnetAnnotationNetworkCount := func(cncName string) int { + annotations, err := getCNCAnnotations(cncName) + if err != nil { + return -1 + } + subnetAnnotation, exists := annotations[ovnNetworkConnectSubnetAnnotation] + if !exists { + return 0 + } + if subnetAnnotation == "{}" { + return 0 + } + var subnets map[string]util.NetworkConnectSubnetAnnotation + if err := json.Unmarshal([]byte(subnetAnnotation), &subnets); err != nil { + return -1 + } + return len(subnets) + } + + ginkgo.BeforeEach(func() { + err := config.PrepareTestConfig() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.IPv4Mode = true + config.IPv6Mode = false + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkConnect = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + }) + + ginkgo.AfterEach(func() { + if controller != nil { + controller.Stop() + } + if wf != nil { + wf.Shutdown() + } + }) + + ginkgo.Context("ClusterNetworkConnect ClusterManager Annotation Tests", func() { + + ginkgo.It("1. CNC created with 0 matching networks only has tunnel ID annotation", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-no-networks" + + start() // No pre-populated objects + + // Create CNC via client + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"nonexistent": "label"}, + }, + }, + }, + }) + _, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have tunnel ID annotation + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Verify no subnet annotation + gomega.Expect(hasNonEmptySubnetAnnotation(cncName)).To(gomega.BeFalse()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("2. CNC created with matching P-CUDNs has both subnet and tunnel ID annotations", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-cudn" + testLabel := map[string]string{"test-cudn": "true"} + cudnNetwork := util.GenerateCUDNNetworkName("test-cudn") + + start() // Start with no pre-populated objects + + // Create NAD first + nad := testCUDNNAD("cudn-red", "red", cudnNetwork, testLabel, "1") + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("red").Create( + context.Background(), nad, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: testLabel, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have both annotations + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) && hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Verify subnet annotation has 1 network + gomega.Expect(getSubnetAnnotationNetworkCount(cncName)).To(gomega.Equal(1)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("3. CNC selects multiple CUDNs matching label selector", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-multi" + testLabel := map[string]string{"env": "test"} + network1 := util.GenerateCUDNNetworkName("blue") + network2 := util.GenerateCUDNNetworkName("green") + + start() + + // Create NADs + nad1 := testCUDNNAD("cudn-blue", "blue", network1, testLabel, "1") + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("blue").Create( + context.Background(), nad1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + nad2 := testCUDNNAD("cudn-green", "green", network2, testLabel, "2") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("green").Create( + context.Background(), nad2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // This one should NOT be selected (different label) + nad3 := testCUDNNAD("cudn-yellow", "yellow", util.GenerateCUDNNetworkName("yellow"), map[string]string{"env": "prod"}, "3") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("yellow").Create( + context.Background(), nad3, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: testLabel, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have both annotations with 2 networks + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(2)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("4. CNC annotations updated when new NAD is created that matches selector", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-nad-create" + testLabel := map[string]string{"test-create": "true"} + network1 := util.GenerateCUDNNetworkName("create1") + + start() + + // Create first NAD + nad1 := testCUDNNAD("cudn-create1", "create1-ns", network1, testLabel, "1") + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("create1-ns").Create( + context.Background(), nad1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: testLabel, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for initial state with 1 network + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(1)) + + // Create a second NAD that matches the selector + network2 := util.GenerateCUDNNetworkName("create2") + nad2 := testCUDNNAD("cudn-create2", "create2-ns", network2, testLabel, "2") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("create2-ns").Create( + context.Background(), nad2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to be updated with 2 networks + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(2)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("5. CNC annotations updated when matching NAD is deleted", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-nad-delete" + testLabel := map[string]string{"test-delete": "true"} + network1 := util.GenerateCUDNNetworkName("del1") + network2 := util.GenerateCUDNNetworkName("del2") + + start() + + // Create NADs + nad1 := testCUDNNAD("cudn-del1", "del1-ns", network1, testLabel, "1") + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("del1-ns").Create( + context.Background(), nad1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + nad2 := testCUDNNAD("cudn-del2", "del2-ns", network2, testLabel, "2") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("del2-ns").Create( + context.Background(), nad2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: testLabel, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for initial state with 2 networks + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(2)) + + // Delete one NAD + err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("del2-ns").Delete( + context.Background(), "cudn-del2", metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to be updated with 1 network + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(1)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("6. CNC annotations updated when NAD label changes to match selector", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-label-match" + matchingLabel := map[string]string{"test-label": "true"} + nonMatchingLabel := map[string]string{"test-label": "false"} + network := util.GenerateCUDNNetworkName("label-match") + + start() + + // Create CNC first + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: matchingLabel, + }, + }, + }, + }) + _, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have tunnel ID (no matching NADs yet) + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + gomega.Expect(hasNonEmptySubnetAnnotation(cncName)).To(gomega.BeFalse()) + + // Create NAD with non-matching label + nad := testCUDNNAD("cudn-label", "label-ns", network, nonMatchingLabel, "1") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("label-ns").Create( + context.Background(), nad, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Still no subnet annotation + gomega.Consistently(func() bool { + return hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(1 * time.Second).Should(gomega.BeFalse()) + + // Update NAD label to match + updatedNAD, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("label-ns").Get( + context.Background(), "cudn-label", metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + updatedNAD.Labels = matchingLabel + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("label-ns").Update( + context.Background(), updatedNAD, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have subnet annotation + gomega.Eventually(func() bool { + return hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("7. CNC annotations updated when NAD label changes to stop matching selector", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-label-unmatch" + matchingLabel := map[string]string{"test-label": "true"} + network := util.GenerateCUDNNetworkName("label-unmatch") + + start() + + // Create NAD with matching label + nad := testCUDNNAD("cudn-unlabel", "unlabel-ns", network, matchingLabel, "1") + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("unlabel-ns").Create( + context.Background(), nad, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: matchingLabel, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have subnet annotation + gomega.Eventually(func() bool { + return hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Update NAD label to no longer match + updatedNAD, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("unlabel-ns").Get( + context.Background(), "cudn-unlabel", metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + updatedNAD.Labels = map[string]string{"test-label": "false"} + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("unlabel-ns").Update( + context.Background(), updatedNAD, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC subnet annotation to become empty + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(0)) + + // Should still have tunnel ID + gomega.Expect(hasTunnelIDAnnotation(cncName)).To(gomega.BeTrue()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("8. CNC ignores secondary network NADs", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-secondary" + testLabel := map[string]string{"test-secondary": "true"} + network := util.GenerateCUDNNetworkName("secondary") + + start() + + // Create a secondary NAD (no "role": "primary" in config) + secondaryNAD := &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cudn-secondary", + Namespace: "secondary-ns", + Labels: testLabel, + Annotations: map[string]string{ + types.OvnNetworkNameAnnotation: network, + types.OvnNetworkIDAnnotation: "1", + }, + OwnerReferences: []metav1.OwnerReference{ + *metav1.NewControllerRef( + &metav1.ObjectMeta{Name: network}, + userdefinednetworkv1.SchemeGroupVersion.WithKind("ClusterUserDefinedNetwork"), + ), + }, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{ + Config: fmt.Sprintf( + `{"cniVersion": "0.4.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "secondary-ns/cudn-secondary", "subnets": "10.0.0.0/16/24"}`, + network, + config.CNI.Plugin, + ), + }, + } + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("secondary-ns").Create( + context.Background(), secondaryNAD, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: testLabel, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have tunnel ID + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Should NOT have subnet annotation (secondary NAD should be ignored) + gomega.Expect(hasNonEmptySubnetAnnotation(cncName)).To(gomega.BeFalse()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + // Primary UDN selector tests + ginkgo.It("9. CNC with Primary UDN selector matches namespace with primary UDN", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-udn" + nsName := "udn-ns" + nadName := "primary-udn" + network := util.GenerateUDNNetworkName(nsName, nadName) + + start() + + // Create namespace with matching label and RequiredUDNNamespaceLabel + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + Labels: map[string]string{ + "udn-enabled": "true", + types.RequiredUDNNamespaceLabel: "", + }, + }, + } + _, err := fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create UDN NAD + nad := testUDNNAD(nadName, nsName, network, "1") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(nsName).Create( + context.Background(), nad, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Configure FakeNetworkManager to return this NAD as primary for the namespace + netInfo, err := util.ParseNADInfo(nad) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + mutableNetInfo := util.NewMutableNetInfo(netInfo) + mutableNetInfo.AddNADs(nsName + "/" + nadName) + fakeNM.PrimaryNetworks[nsName] = mutableNetInfo + + // Create CNC with Primary UDN selector + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"udn-enabled": "true"}, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have both annotations + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) && hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Verify subnet annotation has 1 network + gomega.Expect(getSubnetAnnotationNetworkCount(cncName)).To(gomega.Equal(1)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("10. CNC selects multiple Primary UDNs from multiple namespaces", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-multi-udn" + + start() + + // Create first namespace and UDN + ns1 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "frontend-a", + Labels: map[string]string{ + "tier": "frontend", + types.RequiredUDNNamespaceLabel: "", + }, + }, + } + _, err := fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + network1 := util.GenerateUDNNetworkName("frontend-a", "primary-udn") + nad1 := testUDNNAD("primary-udn", "frontend-a", network1, "1") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("frontend-a").Create( + context.Background(), nad1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + netInfo1, err := util.ParseNADInfo(nad1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + mutableNetInfo1 := util.NewMutableNetInfo(netInfo1) + mutableNetInfo1.AddNADs("frontend-a/primary-udn") + fakeNM.PrimaryNetworks["frontend-a"] = mutableNetInfo1 + + // Create second namespace and UDN + ns2 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "frontend-b", + Labels: map[string]string{ + "tier": "frontend", + types.RequiredUDNNamespaceLabel: "", + }, + }, + } + _, err = fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + network2 := util.GenerateUDNNetworkName("frontend-b", "primary-udn") + nad2 := testUDNNAD("primary-udn", "frontend-b", network2, "2") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("frontend-b").Create( + context.Background(), nad2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + netInfo2, err := util.ParseNADInfo(nad2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + mutableNetInfo2 := util.NewMutableNetInfo(netInfo2) + mutableNetInfo2.AddNADs("frontend-b/primary-udn") + fakeNM.PrimaryNetworks["frontend-b"] = mutableNetInfo2 + + // Create a non-matching namespace + ns3 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "backend", + Labels: map[string]string{"tier": "backend"}, + }, + } + _, err = fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns3, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"tier": "frontend"}, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have 2 networks in subnet annotation + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(2)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("11. CNC updated when namespace label changes to match Primary UDN selector", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-ns-label-match" + nsName := "label-change-ns" + nadName := "primary-udn" + network := util.GenerateUDNNetworkName(nsName, nadName) + + start() + + // Create CNC first + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }) + _, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for tunnel ID (no matching namespaces yet) + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + gomega.Expect(hasNonEmptySubnetAnnotation(cncName)).To(gomega.BeFalse()) + + // Create namespace with non-matching selector label but with RequiredUDNNamespaceLabel + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + Labels: map[string]string{ + "selected": "false", + types.RequiredUDNNamespaceLabel: "", + }, + }, + } + _, err = fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create UDN NAD + nad := testUDNNAD(nadName, nsName, network, "1") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(nsName).Create( + context.Background(), nad, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Configure FakeNetworkManager + netInfo, err := util.ParseNADInfo(nad) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + mutableNetInfo := util.NewMutableNetInfo(netInfo) + mutableNetInfo.AddNADs(nsName + "/" + nadName) + fakeNM.PrimaryNetworks[nsName] = mutableNetInfo + + // Still no subnet annotation + gomega.Consistently(func() bool { + return hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(1 * time.Second).Should(gomega.BeFalse()) + + // Update namespace label to match (keep RequiredUDNNamespaceLabel) + updatedNS, err := fakeClientset.KubeClient.CoreV1().Namespaces().Get( + context.Background(), nsName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + updatedNS.Labels = map[string]string{ + "selected": "true", + types.RequiredUDNNamespaceLabel: "", + } + _, err = fakeClientset.KubeClient.CoreV1().Namespaces().Update( + context.Background(), updatedNS, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have subnet annotation + gomega.Eventually(func() bool { + return hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("12. CNC updated when namespace label changes to stop matching Primary UDN selector", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-ns-unmatch" + nsName := "unmatch-ns" + nadName := "primary-udn" + network := util.GenerateUDNNetworkName(nsName, nadName) + + start() + + // Create namespace with matching label + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + Labels: map[string]string{ + "selected": "true", + types.RequiredUDNNamespaceLabel: "", + }, + }, + } + _, err := fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create UDN NAD + nad := testUDNNAD(nadName, nsName, network, "1") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(nsName).Create( + context.Background(), nad, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Configure FakeNetworkManager + netInfo, err := util.ParseNADInfo(nad) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + mutableNetInfo := util.NewMutableNetInfo(netInfo) + mutableNetInfo.AddNADs(nsName + "/" + nadName) + fakeNM.PrimaryNetworks[nsName] = mutableNetInfo + + // Create CNC with Primary UDN selector + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have subnet annotation (namespace matches) + gomega.Eventually(func() bool { + return hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Update namespace label to stop matching (keep RequiredUDNNamespaceLabel) + updatedNS, err := fakeClientset.KubeClient.CoreV1().Namespaces().Get( + context.Background(), nsName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + updatedNS.Labels = map[string]string{ + "selected": "false", // no longer matches + types.RequiredUDNNamespaceLabel: "", + } + _, err = fakeClientset.KubeClient.CoreV1().Namespaces().Update( + context.Background(), updatedNS, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC subnet annotation to become empty + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(0)) + + // Should still have tunnel ID + gomega.Expect(hasTunnelIDAnnotation(cncName)).To(gomega.BeTrue()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("13. CNC ignores namespace without primary UDN even if label matches", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-no-udn" + + start() + + // Create namespace with matching label but no UDN + // Note: we don't add RequiredUDNNamespaceLabel since this namespace has no UDN + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "no-udn-ns", + Labels: map[string]string{"selected": "true"}, + }, + } + _, err := fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Don't configure FakeNetworkManager - no primary network for this namespace + + // Create CNC + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"selected": "true"}, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for tunnel ID + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Should NOT have subnet annotation (namespace has no primary UDN) + gomega.Expect(hasNonEmptySubnetAnnotation(cncName)).To(gomega.BeFalse()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("14. CNC selector update causes networks to start matching", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-selector-match" + cudnNetwork := util.GenerateCUDNNetworkName("selector-test") + + start() + + // Create NAD with specific label + nad := testCUDNNAD("cudn-selector", "selector-ns", cudnNetwork, map[string]string{"env": "prod"}, "1") + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("selector-ns").Create( + context.Background(), nad, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC with non-matching selector + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"env": "dev"}, // doesn't match "prod" + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for tunnel ID (no matching networks yet) + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + gomega.Expect(hasNonEmptySubnetAnnotation(cncName)).To(gomega.BeFalse()) + + // Update CNC selector to match the NAD + updatedCNC, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cncName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + updatedCNC.Spec.NetworkSelectors = []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"env": "prod"}, // now matches + }, + }, + }, + } + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Update( + context.Background(), updatedCNC, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have subnet annotation + gomega.Eventually(func() bool { + return hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("15. CNC selector update causes networks to stop matching", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-selector-unmatch" + cudnNetwork := util.GenerateCUDNNetworkName("selector-unmatch") + + start() + + // Create NAD with specific label + nad := testCUDNNAD("cudn-unmatch", "unmatch-ns", cudnNetwork, map[string]string{"env": "prod"}, "1") + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("unmatch-ns").Create( + context.Background(), nad, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC with matching selector + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"env": "prod"}, // matches + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have subnet annotation + gomega.Eventually(func() bool { + return hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Update CNC selector to no longer match + updatedCNC, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cncName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + updatedCNC.Spec.NetworkSelectors = []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"env": "dev"}, // no longer matches "prod" + }, + }, + }, + } + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Update( + context.Background(), updatedCNC, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC subnet annotation to become empty + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.Equal(0)) + + // Should still have tunnel ID + gomega.Expect(hasTunnelIDAnnotation(cncName)).To(gomega.BeTrue()) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.It("16. CNC continues processing healthy networks even when one NAD has parse error", func() { + app.Action = func(*cli.Context) error { + cncName := "test-cnc-error-aggregation" + testLabel := map[string]string{"error-test": "true"} + healthyNetwork := util.GenerateCUDNNetworkName("healthy") + + start() + + // Create a NAD with malformed config (will cause ParseNADInfo to fail) + malformedNAD := &nadv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "malformed-nad", + Namespace: "malformed-ns", + Labels: testLabel, + Annotations: map[string]string{ + types.OvnNetworkNameAnnotation: "malformed-network", + types.OvnNetworkIDAnnotation: "1", + }, + OwnerReferences: []metav1.OwnerReference{makeCUDNOwnerRef("malformed-cudn")}, + }, + Spec: nadv1.NetworkAttachmentDefinitionSpec{ + // Invalid JSON config - missing required fields, will fail ParseNADInfo + Config: `{"cniVersion": "0.4.0", "name": "malformed", "type": "invalid-type"}`, + }, + } + _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("malformed-ns").Create( + context.Background(), malformedNAD, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create a healthy NAD + healthyNAD := testCUDNNAD("healthy-nad", "healthy-ns", healthyNetwork, testLabel, "2") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("healthy-ns").Create( + context.Background(), healthyNAD, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create CNC that matches both NADs + cnc := testCNC(cncName, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: testLabel, + }, + }, + }, + }) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC to have tunnel ID and subnet annotation + // The healthy NAD should be processed even though the malformed one fails + gomega.Eventually(func() bool { + return hasTunnelIDAnnotation(cncName) && hasNonEmptySubnetAnnotation(cncName) + }).WithTimeout(5 * time.Second).Should(gomega.BeTrue()) + + // Verify subnet annotation has 1 network (the healthy one) + // The malformed NAD should have been skipped due to parse error + gomega.Expect(getSubnetAnnotationNetworkCount(cncName)).To(gomega.Equal(1)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + }) +}) + +var _ = ginkgo.Describe("NetworkConnect ClusterManager Controller InitialSync Tests", func() { + var ( + app *cli.App + ) + + ginkgo.BeforeEach(func() { + err := config.PrepareTestConfig() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.IPv4Mode = true + config.IPv6Mode = false + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkConnect = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + }) + + ginkgo.Context("Controller restart preserves allocator state", func() { + ginkgo.It("initialSync correctly restores tunnel IDs and subnet allocations after restart", func() { + app.Action = func(*cli.Context) error { + // ============================================================ + // PHASE 1: Set up controller with 2 CNCs selecting multiple networks + // ============================================================ + + // CNC1: 2 CUDNs + 2 PUDNs + cnc1Name := "test-cnc1" + cnc1CUDNLabel := map[string]string{"cnc1-cudn": "true"} + cudn1Network := util.GenerateCUDNNetworkName("cudn1") + cudn2Network := util.GenerateCUDNNetworkName("cudn2") + + // CNC2: 1 CUDN + 1 PUDN + cnc2Name := "test-cnc2" + cnc2CUDNLabel := map[string]string{"cnc2-cudn": "true"} + cudn3Network := util.GenerateCUDNNetworkName("cudn3") + + // Create clientset and watch factory + fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() + ovntest.AddNetworkConnectApplyReactor(fakeClientset.NetworkConnectClient.(*networkconnectfake.Clientset)) + + wf, err := factory.NewClusterManagerWatchFactory(fakeClientset) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeNM := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + tunnelKeysAllocator := id.NewTunnelKeyAllocator("TunnelKeys") + controller := NewController(wf, fakeClientset, fakeNM.Interface(), tunnelKeysAllocator) + + err = wf.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = controller.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ============================================================ + // Create NADs for CNC1 (2 CUDNs) + // ============================================================ + nad1 := newTestCUDNNAD("cudn1-nad", "ns-cudn1", cudn1Network, cnc1CUDNLabel, "1") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("ns-cudn1").Create( + context.Background(), nad1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + nad2 := newTestCUDNNAD("cudn2-nad", "ns-cudn2", cudn2Network, cnc1CUDNLabel, "2") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("ns-cudn2").Create( + context.Background(), nad2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ============================================================ + // Create P-UDN namespaces and NADs for CNC1 (2 P-UDNs) + // ============================================================ + ns1 := newTestNamespace("pudn1-ns", map[string]string{ + "cnc1-pudn": "true", + types.RequiredUDNNamespaceLabel: "", + }) + _, err = fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + pudn1Network := util.GenerateUDNNetworkName("pudn1-ns", "primary-udn") + nad3 := newTestUDNNAD("primary-udn", "pudn1-ns", pudn1Network, "3") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("pudn1-ns").Create( + context.Background(), nad3, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + netInfo1, err := util.ParseNADInfo(nad3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + mutableNetInfo1 := util.NewMutableNetInfo(netInfo1) + mutableNetInfo1.AddNADs("pudn1-ns/primary-udn") + fakeNM.PrimaryNetworks["pudn1-ns"] = mutableNetInfo1 + + ns2 := newTestNamespace("pudn2-ns", map[string]string{ + "cnc1-pudn": "true", + types.RequiredUDNNamespaceLabel: "", + }) + _, err = fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + pudn2Network := util.GenerateUDNNetworkName("pudn2-ns", "primary-udn") + nad4 := newTestUDNNAD("primary-udn", "pudn2-ns", pudn2Network, "4") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("pudn2-ns").Create( + context.Background(), nad4, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + netInfo2, err := util.ParseNADInfo(nad4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + mutableNetInfo2 := util.NewMutableNetInfo(netInfo2) + mutableNetInfo2.AddNADs("pudn2-ns/primary-udn") + fakeNM.PrimaryNetworks["pudn2-ns"] = mutableNetInfo2 + + // ============================================================ + // Create NADs for CNC2 (1 CUDN + 1 P-UDN) + // ============================================================ + nad5 := newTestCUDNNAD("cudn3-nad", "ns-cudn3", cudn3Network, cnc2CUDNLabel, "5") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("ns-cudn3").Create( + context.Background(), nad5, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ns3 := newTestNamespace("pudn3-ns", map[string]string{ + "cnc2-pudn": "true", + types.RequiredUDNNamespaceLabel: "", + }) + _, err = fakeClientset.KubeClient.CoreV1().Namespaces().Create( + context.Background(), ns3, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + pudn3Network := util.GenerateUDNNetworkName("pudn3-ns", "primary-udn") + nad6 := newTestUDNNAD("primary-udn", "pudn3-ns", pudn3Network, "6") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("pudn3-ns").Create( + context.Background(), nad6, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + netInfo3, err := util.ParseNADInfo(nad6) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + mutableNetInfo3 := util.NewMutableNetInfo(netInfo3) + mutableNetInfo3.AddNADs("pudn3-ns/primary-udn") + fakeNM.PrimaryNetworks["pudn3-ns"] = mutableNetInfo3 + + // ============================================================ + // Create CNCs + // ============================================================ + cnc1 := newTestCNC(cnc1Name, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: cnc1CUDNLabel, + }, + }, + }, + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"cnc1-pudn": "true"}, + }, + }, + }, + }, nil) // uses default 192.168.0.0/16 /24 + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + cnc2 := newTestCNC(cnc2Name, []apitypes.NetworkSelector{ + { + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: cnc2CUDNLabel, + }, + }, + }, + { + NetworkSelectionType: apitypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &apitypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"cnc2-pudn": "true"}, + }, + }, + }, + }, []networkconnectv1.ConnectSubnet{{CIDR: "10.100.0.0/16", NetworkPrefix: 24}}) + _, err = fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Create( + context.Background(), cnc2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ============================================================ + // Wait for annotations to be set + // ============================================================ + getCNCAnnotations := func(cncName string) (map[string]string, error) { + cnc, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cncName, metav1.GetOptions{}) + if err != nil { + return nil, err + } + return cnc.Annotations, nil + } + + getSubnetAnnotationNetworkCount := func(cncName string) int { + annotations, err := getCNCAnnotations(cncName) + if err != nil { + return -1 + } + subnetAnnotation, exists := annotations[ovnNetworkConnectSubnetAnnotation] + if !exists { + return 0 + } + if subnetAnnotation == "{}" { + return 0 + } + var subnets map[string]util.NetworkConnectSubnetAnnotation + if err := json.Unmarshal([]byte(subnetAnnotation), &subnets); err != nil { + return -1 + } + return len(subnets) + } + + // Wait for CNC1 to have 4 networks (2 CUDNs + 2 PUDNs) + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cnc1Name) + }).WithTimeout(10 * time.Second).Should(gomega.Equal(4)) + + // Wait for CNC2 to have 2 networks (1 CUDN + 1 PUDN) + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cnc2Name) + }).WithTimeout(10 * time.Second).Should(gomega.Equal(2)) + + // ============================================================ + // PHASE 2: Capture state before restart + // ============================================================ + cnc1BeforeRestart, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cnc1Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + cnc2BeforeRestart, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cnc2Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Store original annotations + cnc1TunnelIDBefore := cnc1BeforeRestart.Annotations[util.OvnConnectRouterTunnelKeyAnnotation] + cnc1SubnetsBefore := cnc1BeforeRestart.Annotations[ovnNetworkConnectSubnetAnnotation] + cnc2TunnelIDBefore := cnc2BeforeRestart.Annotations[util.OvnConnectRouterTunnelKeyAnnotation] + cnc2SubnetsBefore := cnc2BeforeRestart.Annotations[ovnNetworkConnectSubnetAnnotation] + + gomega.Expect(cnc1TunnelIDBefore).NotTo(gomega.BeEmpty()) + gomega.Expect(cnc1SubnetsBefore).NotTo(gomega.BeEmpty()) + gomega.Expect(cnc2TunnelIDBefore).NotTo(gomega.BeEmpty()) + gomega.Expect(cnc2SubnetsBefore).NotTo(gomega.BeEmpty()) + + // ============================================================ + // PHASE 3: Stop the controller + // ============================================================ + controller.Stop() + wf.Shutdown() + + // ============================================================ + // PHASE 4: Restart with same objects (simulating restart) + // ============================================================ + // Create new watch factory and controller with same clientset (keeps objects) + wf2, err := factory.NewClusterManagerWatchFactory(fakeClientset) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create new FakeNetworkManager with same primary networks config + fakeNM2 := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + // Re-setup primary networks (in real deployment this comes from network manager cache) + fakeNM2.PrimaryNetworks["pudn1-ns"] = mutableNetInfo1 + fakeNM2.PrimaryNetworks["pudn2-ns"] = mutableNetInfo2 + fakeNM2.PrimaryNetworks["pudn3-ns"] = mutableNetInfo3 + + tunnelKeysAllocator2 := id.NewTunnelKeyAllocator("TunnelKeys") + controller2 := NewController(wf2, fakeClientset, fakeNM2.Interface(), tunnelKeysAllocator2) + + err = wf2.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = controller2.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ============================================================ + // PHASE 5: Verify state is correctly restored + // ============================================================ + + // Get CNCs after restart + cnc1AfterRestart, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cnc1Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + cnc2AfterRestart, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cnc2Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify tunnel IDs are unchanged + gomega.Expect(cnc1AfterRestart.Annotations[util.OvnConnectRouterTunnelKeyAnnotation]).To( + gomega.Equal(cnc1TunnelIDBefore), "CNC1 tunnel ID should be preserved after restart") + gomega.Expect(cnc2AfterRestart.Annotations[util.OvnConnectRouterTunnelKeyAnnotation]).To( + gomega.Equal(cnc2TunnelIDBefore), "CNC2 tunnel ID should be preserved after restart") + + // Verify subnet annotations are unchanged + gomega.Expect(cnc1AfterRestart.Annotations[ovnNetworkConnectSubnetAnnotation]).To( + gomega.Equal(cnc1SubnetsBefore), "CNC1 subnet allocations should be preserved after restart") + gomega.Expect(cnc2AfterRestart.Annotations[ovnNetworkConnectSubnetAnnotation]).To( + gomega.Equal(cnc2SubnetsBefore), "CNC2 subnet allocations should be preserved after restart") + + // Verify network counts are unchanged + gomega.Expect(getSubnetAnnotationNetworkCount(cnc1Name)).To(gomega.Equal(4)) + gomega.Expect(getSubnetAnnotationNetworkCount(cnc2Name)).To(gomega.Equal(2)) + + // ============================================================ + // PHASE 6: Verify allocator state by adding new networks + // ============================================================ + // Add a new CUDN to CNC1 and verify it gets a NEW subnet (not conflicting) + newCUDNNetwork := util.GenerateCUDNNetworkName("new-cudn") + newNAD := newTestCUDNNAD("new-cudn-nad", "ns-new-cudn", newCUDNNetwork, cnc1CUDNLabel, "7") + _, err = fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("ns-new-cudn").Create( + context.Background(), newNAD, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for CNC1 to have 5 networks now (4 original + 1 new) + gomega.Eventually(func() int { + return getSubnetAnnotationNetworkCount(cnc1Name) + }).WithTimeout(10 * time.Second).Should(gomega.Equal(5)) + + // Get the updated CNC1 and verify new subnet doesn't conflict with existing ones + cnc1Final, err := fakeClientset.NetworkConnectClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cnc1Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Parse the subnet annotations + var subnetsBefore map[string]util.NetworkConnectSubnetAnnotation + err = json.Unmarshal([]byte(cnc1SubnetsBefore), &subnetsBefore) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + var subnetsAfter map[string]util.NetworkConnectSubnetAnnotation + err = json.Unmarshal([]byte(cnc1Final.Annotations[ovnNetworkConnectSubnetAnnotation]), &subnetsAfter) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // All original subnets should still be present with same values + for owner, subnet := range subnetsBefore { + gomega.Expect(subnetsAfter).To(gomega.HaveKey(owner)) + gomega.Expect(subnetsAfter[owner]).To(gomega.Equal(subnet), + "Original subnet for %s should be unchanged", owner) + } + + // The new network should get the next sequential subnet after the existing ones + // CNC1 uses default 192.168.0.0/16 with /24 prefix, subnets are allocated sequentially: + // 192.168.0.0/24, 192.168.1.0/24, 192.168.2.0/24, 192.168.3.0/24, ... + // CNC1 had 4 networks before (subnets 0-3), so the 5th should get 192.168.4.0/24 + expectedNextSubnet := "192.168.4.0/24" + + // Find the new network's subnet + var newNetworkSubnet string + for owner, subnet := range subnetsAfter { + if _, existed := subnetsBefore[owner]; !existed { + // This is the new network + newNetworkSubnet = subnet.IPv4 + break + } + } + + gomega.Expect(newNetworkSubnet).To(gomega.Equal(expectedNextSubnet), + "New network should get the next sequential subnet %s, but got %s", + expectedNextSubnet, newNetworkSubnet) + + // Cleanup + controller2.Stop() + wf2.Shutdown() + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + }) +}) diff --git a/go-controller/pkg/clustermanager/networkconnect/hybrid_connect_subnet_allocator.go b/go-controller/pkg/clustermanager/networkconnect/hybrid_connect_subnet_allocator.go new file mode 100644 index 0000000000..42ed8a2b78 --- /dev/null +++ b/go-controller/pkg/clustermanager/networkconnect/hybrid_connect_subnet_allocator.go @@ -0,0 +1,381 @@ +package networkconnect + +import ( + "errors" + "fmt" + "net" + "sync" + + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/node" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" +) + +var ( + p2pIPV4SubnetMask = 31 + p2pIPV6SubnetMask = 127 +) + +// randomizedLayer2BlockOwner returns the block owner name for new layer2 block allocations. +// Used during runtime when a new block is allocated for the first layer2 network. +// This is a random string to avoid conflicts. +func randomizedLayer2BlockOwner() string { + return fmt.Sprintf("l2-block-%s", rand.String(15)) +} + +// HybridConnectSubnetAllocator provides hybrid allocation for network connect subnets: +// - Layer3 networks: Each gets a full layer3NetworkPrefix block (e.g., /24) +// - Layer2 networks: Block allocation - multiple Layer2 networks share layer3Network‍Prefix blocks, +// with each Layer2 network getting a /31 (IPv4) or /127 (IPv6) from the shared block +// +// This allocator uses the node.SubnetAllocator to allocate subnets underneath using the layer3NetworkPrefix. +// +// We run one instance of this allocator per CNC. +type HybridConnectSubnetAllocator interface { + // AddNetworkRange initializes the allocator with the overall CIDR range and network prefix. + // Must be called during initialization before any concurrent Allocate/Release calls. + AddNetworkRange(network *net.IPNet, networkPrefix int) error + + // AllocateLayer3Subnet allocates network subnets for the layer3 network owner (could be both IPv4 and IPv6 in dual-stack) + AllocateLayer3Subnet(owner string) ([]*net.IPNet, error) + + // AllocateLayer2Subnet allocates /31 (IPv4) and/or /127 (IPv6) for Layer2 networks from shared layer3 networkPrefix blocks + AllocateLayer2Subnet(owner string) ([]*net.IPNet, error) + + // ReleaseLayer3Subnet releases all subnets for the layer3 network owner + ReleaseLayer3Subnet(owner string) + + // ReleaseLayer2Subnet releases all subnets for the layer2 network owner + ReleaseLayer2Subnet(owner string) + + // Layer2RangeCount returns the number of v4 and v6 ranges in the layer2 allocator (for testing) + Layer2RangeCount() (uint64, uint64) + + // Layer3RangeCount returns the number of v4 and v6 ranges in the layer3 allocator (for testing) + Layer3RangeCount() (uint64, uint64) + + // Layer3Usage returns the number of allocated v4 and v6 subnets in the layer3 allocator (for testing) + Layer3Usage() (uint64, uint64) + + // MarkAllocatedSubnets restores previously allocated subnets from annotation at startup. + // This should be called after AddNetworkRange but before any new allocations. + // It marks subnets as already allocated so they won't be handed out again. + MarkAllocatedSubnets(allocatedSubnets map[string][]*net.IPNet) error +} + +// hybridConnectSubnetAllocator implements HybridConnectSubnetAllocator +type hybridConnectSubnetAllocator struct { + // Layer3: Standard subnet allocator (each network gets full networkPrefix block) + layer3Allocator node.SubnetAllocator + + // Layer2: A chunk allocated from layer3Allocator (one or more networkPrefix blocks are assigned to this allocator) + // It subdivides the chunk into /31s or /127s for each Layer2 network + layer2Allocator node.SubnetAllocator + + // networkPrefix per address family - used to mathematically derive parent block from allocated subnets + // NOTE: This logic assumes we only support atmost 2 CIDR ranges for this allocator - one for IPv4 and one for IPv6 in CNC. + v4NetworkPrefix int + v6NetworkPrefix int + + // used to protect layer2BlockOwners cache that is used from AllocateLayer2Subnet and ReleaseLayer2Subnet + mu sync.RWMutex + + // Layer2 block tracking for proper release + // When a layer2 network is released, we need to also check if the + // subsequent layer3 block should be released if that's the last network + // that was holding that block. + // Key is the layer2 block subnet CIDR i.e the string form of v4,v6 CIDR block + // Value is the layer2 block owner name that the layer2 block is using. This is + // a random string of the pattern "l2-block-" to avoid conflicts. + // maps layer2 block subnet CIDR to the layer2 block owner name + // Example: + // "10.100.0.0/28" -> "l2-block-" for single-stack IPv4 + // "fd00::/123" -> "l2-block-" for single-stack IPv6 + // "10.100.0.0/28,fd00::/123" -> "l2-block-" for dual-stack + layer2BlockOwners map[string]string +} + +// NewHybridConnectSubnetAllocator creates a new hybrid connect subnet allocator +// and adds the network ranges for the connect subnets to the allocator. +func NewHybridConnectSubnetAllocator(connectSubnets []networkconnectv1.ConnectSubnet, cncName string) (HybridConnectSubnetAllocator, error) { + allocator := &hybridConnectSubnetAllocator{ + layer3Allocator: node.NewSubnetAllocator(), + layer2Allocator: node.NewSubnetAllocator(), + layer2BlockOwners: make(map[string]string), + } + for _, connectSubnet := range connectSubnets { + _, netCIDR, err := net.ParseCIDR(string(connectSubnet.CIDR)) + if err != nil { + return nil, fmt.Errorf("failed to parse connect subnet CIDR %s: %w", connectSubnet.CIDR, err) + } + if utilnet.IsIPv4CIDR(netCIDR) && config.IPv4Mode { + if err := allocator.AddNetworkRange(netCIDR, int(connectSubnet.NetworkPrefix)); err != nil { + return nil, fmt.Errorf("failed to add IPV4 network range %s to cluster network connect %s subnet allocator: %w", netCIDR, cncName, err) + } + klog.V(5).Infof("Added IPV4 network range %s to cluster network connect %s subnet allocator", netCIDR, cncName) + } + if utilnet.IsIPv6CIDR(netCIDR) && config.IPv6Mode { + if err := allocator.AddNetworkRange(netCIDR, int(connectSubnet.NetworkPrefix)); err != nil { + return nil, fmt.Errorf("failed to add IPV6 network range %s to cluster network connect %s subnet allocator: %w", netCIDR, cncName, err) + } + klog.V(5).Infof("Added IPV6 network range %s to cluster network connect %s subnet allocator", netCIDR, cncName) + } + } + return allocator, nil +} + +// AddNetworkRange initializes the allocator with the base CIDR range and network prefix. +// This must be called during initialization before any concurrent Allocate/Release calls. +func (hca *hybridConnectSubnetAllocator) AddNetworkRange(network *net.IPNet, networkPrefix int) error { + // Validate network prefix + ones, bits := network.Mask.Size() + if networkPrefix <= ones { + return fmt.Errorf("networkPrefix %d must be larger than base CIDR prefix %d", networkPrefix, ones) + } + if networkPrefix >= bits { + return fmt.Errorf("networkPrefix %d must be smaller than address length %d", networkPrefix, bits) + } + + // Store the networkPrefix per address family + // It is not thread-safe and should only be called from a single goroutine during setup. + if utilnet.IsIPv6CIDR(network) { + hca.v6NetworkPrefix = networkPrefix + } else { + hca.v4NetworkPrefix = networkPrefix + } + + // Add this network range to the Layer3 allocator - each allocation gets a networkPrefix block + if err := hca.layer3Allocator.AddNetworkRange(network, networkPrefix); err != nil { + return fmt.Errorf("failed to add network range to Layer3 allocator: %v", err) + } + + return nil +} + +// AllocateLayer3Subnet allocates a full networkPrefix block for Layer3 networks. +// This will try to allocate from available ranges (both IPv4 and IPv6). +// Caller must call AddNetworkRange before calling this function. +func (hca *hybridConnectSubnetAllocator) AllocateLayer3Subnet(owner string) ([]*net.IPNet, error) { + subnets, err := hca.layer3Allocator.AllocateNetworks(owner) + if err != nil { + return nil, fmt.Errorf("Layer3 allocation failed for %s: %v", owner, err) + } + return subnets, nil +} + +// AllocateLayer2Subnet allocates /31 (IPv4) and/or /127 (IPv6) from shared Layer2 blocks. +// This will allocate from all available address families (both IPv4 and IPv6 if dual-stack). +// Caller must call AddNetworkRange before calling this function. +func (hca *hybridConnectSubnetAllocator) AllocateLayer2Subnet(owner string) ([]*net.IPNet, error) { + hca.mu.Lock() + defer hca.mu.Unlock() + + var err error + var subnets []*net.IPNet + + // Try to allocate from current Layer2 block + subnets, err = hca.layer2Allocator.AllocateNetworks(owner) + // Only return if we got subnets - empty slice means no ranges configured yet + if err == nil && len(subnets) > 0 { + return subnets, nil + } + if err != nil && !errors.Is(err, node.ErrSubnetAllocatorFull) { + return nil, fmt.Errorf("Layer2 allocation failed for %s: %v", owner, err) + } + + // Current layer2 allocator is empty (no ranges added yet - lazy initialization) or + // full (ErrSubnetAllocatorFull) - expand it with new blocks and then allocate + if err := hca.expandLayer2Allocator(); err != nil { + return nil, fmt.Errorf("failed to expand Layer2 allocator: %v", err) + } + + // Retry allocation after expanding - this will come from the new block + subnets, err = hca.layer2Allocator.AllocateNetworks(owner) + if err != nil { + return nil, fmt.Errorf("Layer2 allocation failed after expansion for %s: %v", owner, err) + } + + return subnets, nil +} + +// getParentBlockCIDR computes the parent block CIDR for a given subnet +// by masking the subnet IP to the networkPrefix boundary +func (hca *hybridConnectSubnetAllocator) getParentBlockCIDR(subnet *net.IPNet) *net.IPNet { + var networkPrefix int + var bits int + + if utilnet.IsIPv6CIDR(subnet) { + networkPrefix = hca.v6NetworkPrefix + bits = 128 + } else { + networkPrefix = hca.v4NetworkPrefix + bits = 32 + } + + mask := net.CIDRMask(networkPrefix, bits) + parentIP := subnet.IP.Mask(mask) + parentNet := &net.IPNet{IP: parentIP, Mask: mask} + return parentNet +} + +// getL2BlocksKey generates a consistent map key from layer2 block subnets. +// For single-stack: returns the CIDR string (e.g., "192.168.0.0/24") +// For dual-stack: returns "v4,v6" format (e.g., "192.168.0.0/24,fd00::/64") +// The key is used to track layer2 blocks in layer2BlockOwners. +func getL2BlocksKey(subnets []*net.IPNet) string { + // sort subnets to be v4, v6 to ensure consistent key + switch len(subnets) { + case 1: + return subnets[0].String() + case 2: + if subnets[0].IP.To4() != nil { + return subnets[0].String() + "," + subnets[1].String() + } else { + return subnets[1].String() + "," + subnets[0].String() + } + default: + return "" + } +} + +// expandLayer2Allocator expands the existing layer2 allocator by allocating new blocks from layer3 allocator +// It tries to allocate both IPv4 and IPv6 blocks from the Layer3 allocator +// If only one family is available, the block will be single-stack +func (hca *hybridConnectSubnetAllocator) expandLayer2Allocator() error { + blockOwnerName := randomizedLayer2BlockOwner() + + allocatedBlocks, err := hca.layer3Allocator.AllocateNetworks(blockOwnerName) + if err != nil { + return fmt.Errorf("failed to allocate layer2 blocks: %v", err) + } + + // Track the layer2 block owner name + hca.layer2BlockOwners[getL2BlocksKey(allocatedBlocks)] = blockOwnerName + + // Add each allocated block to the existing layer2 allocator (expanding it) + for _, block := range allocatedBlocks { + if utilnet.IsIPv6CIDR(block) && config.IPv6Mode { + if err := hca.layer2Allocator.AddNetworkRange(block, p2pIPV6SubnetMask); err != nil { + return fmt.Errorf("failed to add IPv6 range to layer2 allocator: %v", err) + } + } + if utilnet.IsIPv4CIDR(block) && config.IPv4Mode { + // IPv4 block + if err := hca.layer2Allocator.AddNetworkRange(block, p2pIPV4SubnetMask); err != nil { + return fmt.Errorf("failed to add IPv4 range to layer2 allocator: %v", err) + } + } + } + + return nil +} + +func (hca *hybridConnectSubnetAllocator) ReleaseLayer3Subnet(owner string) { + hca.layer3Allocator.ReleaseAllNetworks(owner) +} + +func (hca *hybridConnectSubnetAllocator) Layer2RangeCount() (uint64, uint64) { + return hca.layer2Allocator.RangeCount() +} + +func (hca *hybridConnectSubnetAllocator) Layer3RangeCount() (uint64, uint64) { + return hca.layer3Allocator.RangeCount() +} + +func (hca *hybridConnectSubnetAllocator) Layer3Usage() (uint64, uint64) { + return hca.layer3Allocator.Usage() +} + +func (hca *hybridConnectSubnetAllocator) ReleaseLayer2Subnet(owner string) { + hca.mu.Lock() + defer hca.mu.Unlock() + + hca.layer2Allocator.ReleaseAllNetworks(owner) + + // now check if any of the layer2 ranges are free now + freedRanges := hca.layer2Allocator.FreeUnusedRanges() + if len(freedRanges) > 0 { + if len(freedRanges) > 2 { + // Should never happen, since single owner never spans more than 2 blocks (v4 and v6) + klog.Errorf("Unexpectedly freed more than 2 ranges (%d) when releasing layer2 subnet for %s", len(freedRanges), owner) + return + } + // Remove free ranges from layer3 allocator + // find which parent blocks they came from + l2BlockKey := getL2BlocksKey(freedRanges) + if blockOwner := hca.layer2BlockOwners[l2BlockKey]; blockOwner != "" { + hca.layer3Allocator.ReleaseAllNetworks(blockOwner) + delete(hca.layer2BlockOwners, l2BlockKey) + } + } +} + +// MarkAllocatedSubnets restores previously allocated subnets from annotation at startup. +// This should be called after AddNetworkRange but before any new allocations. +// It marks subnets as already allocated so they won't be handed out again. +func (hca *hybridConnectSubnetAllocator) MarkAllocatedSubnets(allocatedSubnets map[string][]*net.IPNet) error { + hca.mu.Lock() + defer hca.mu.Unlock() + + for owner, subnets := range allocatedSubnets { + topologyType, ok := parseNetworkOwnerTopology(owner) + if !ok { + continue + } + + switch topologyType { + case ovntypes.Layer3Topology: + // Simple: just mark in layer3 allocator + if err := hca.layer3Allocator.MarkAllocatedNetworks(owner, subnets...); err != nil { + return fmt.Errorf("failed to mark layer3 subnets for %s: %v", owner, err) + } + + case ovntypes.Layer2Topology: + // First ensure l2 block is already reserved in layer3 allocator + l2BlockSubnets := []*net.IPNet{} + // loop through the v4 and v6 allocated subnets for this network owner and get the parent block CIDR + for _, subnet := range subnets { + parentCIDR := hca.getParentBlockCIDR(subnet) + l2BlockSubnets = append(l2BlockSubnets, parentCIDR) + } + + l2BlockKey := getL2BlocksKey(l2BlockSubnets) + if _, exists := hca.layer2BlockOwners[l2BlockKey]; !exists { + // Set up block if not seen yet + blockOwnerName := randomizedLayer2BlockOwner() + for _, parentNet := range l2BlockSubnets { + + // Mark parent block in layer3 allocator (as a block) + err := hca.layer3Allocator.MarkAllocatedNetworks(blockOwnerName, parentNet) + if err != nil { + return fmt.Errorf("failed to mark block %s: %v", parentNet.String(), err) + } + + // Add range to layer2 allocator for /31 or /127 allocations + prefixLen := p2pIPV4SubnetMask + if utilnet.IsIPv6CIDR(parentNet) { + prefixLen = p2pIPV6SubnetMask + } + if err := hca.layer2Allocator.AddNetworkRange(parentNet, prefixLen); err != nil { + return fmt.Errorf("failed to add layer2 range %s: %v", parentNet.String(), err) + } + + } + hca.layer2BlockOwners[l2BlockKey] = blockOwnerName + } + // Now mark current l2 networks as allocated + for _, subnet := range subnets { + // Mark the /31 or /127 subnet in layer2 allocator + if err := hca.layer2Allocator.MarkAllocatedNetworks(owner, subnet); err != nil { + return fmt.Errorf("failed to mark layer2 subnet %s for %s: %v", subnet.String(), owner, err) + } + } + } + } + return nil +} diff --git a/go-controller/pkg/clustermanager/networkconnect/hybrid_connect_subnet_allocator_test.go b/go-controller/pkg/clustermanager/networkconnect/hybrid_connect_subnet_allocator_test.go new file mode 100644 index 0000000000..ce91fd5d64 --- /dev/null +++ b/go-controller/pkg/clustermanager/networkconnect/hybrid_connect_subnet_allocator_test.go @@ -0,0 +1,1680 @@ +package networkconnect + +import ( + "fmt" + "net" + "testing" + + "github.com/onsi/gomega" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" +) + +func mustParseCIDR(cidr string) *net.IPNet { + _, network, err := net.ParseCIDR(cidr) + if err != nil { + panic(err) + } + return network +} + +func TestHybridConnectSubnetAllocator_AddNetworkRange(t *testing.T) { + tests := []struct { + name string + network string + networkPrefix int + expectErr string + }{ + { + name: "valid IPv4 range", + network: "192.168.0.0/16", + networkPrefix: 24, + expectErr: "", + }, + { + name: "valid IPv6 range", + network: "fd00::/48", + networkPrefix: 64, + expectErr: "", + }, + { + name: "networkPrefix smaller than base CIDR prefix", + network: "192.168.0.0/24", + networkPrefix: 16, + expectErr: "networkPrefix 16 must be larger than base CIDR prefix 24", + }, + { + name: "networkPrefix equal to base CIDR prefix", + network: "192.168.0.0/24", + networkPrefix: 24, + expectErr: "networkPrefix 24 must be larger than base CIDR prefix 24", + }, + { + name: "networkPrefix equal to address length", + network: "192.168.0.0/16", + networkPrefix: 32, + expectErr: "networkPrefix 32 must be smaller than address length 32", + }, + { + name: "networkPrefix larger than address length", + network: "192.168.0.0/16", + networkPrefix: 33, + expectErr: "networkPrefix 33 must be smaller than address length 32", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = true + + allocator, err := NewHybridConnectSubnetAllocator(nil, "test-cnc") + if err != nil { + t.Fatalf("failed to create subnet allocator: %v", err) + } + network := mustParseCIDR(tt.network) + err = allocator.AddNetworkRange(network, tt.networkPrefix) + + if tt.expectErr != "" { + g.Expect(err).To(gomega.MatchError(gomega.ContainSubstring(tt.expectErr))) + } else { + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + }) + } +} + +func TestHybridConnectSubnetAllocator_AllocateLayer3Subnet(t *testing.T) { + tests := []struct { + name string + ipv4Mode bool + ipv6Mode bool + owners []string + expectSubnets map[string][]string + }{ + { + name: "single IPv4 allocation", + ipv4Mode: true, + ipv6Mode: false, + owners: []string{"layer3_1"}, + expectSubnets: map[string][]string{ + "layer3_1": {"192.168.0.0/24"}, + }, + }, + { + name: "multiple IPv4 allocations", + ipv4Mode: true, + ipv6Mode: false, + owners: []string{"layer3_1", "layer3_2", "layer3_3"}, + expectSubnets: map[string][]string{ + "layer3_1": {"192.168.0.0/24"}, + "layer3_2": {"192.168.1.0/24"}, + "layer3_3": {"192.168.2.0/24"}, + }, + }, + { + name: "single IPv6 allocation", + ipv4Mode: false, + ipv6Mode: true, + owners: []string{"layer3_1"}, + expectSubnets: map[string][]string{ + // With /112 CIDR and /120 prefix, blocks are /120 (256 addresses each) + "layer3_1": {"fd00::/120"}, + }, + }, + { + name: "multiple IPv6 allocations", + ipv4Mode: false, + ipv6Mode: true, + owners: []string{"layer3_1", "layer3_2", "layer3_3"}, + expectSubnets: map[string][]string{ + // /120 blocks: fd00::/120, fd00::100/120, fd00::200/120, etc. + "layer3_1": {"fd00::/120"}, + "layer3_2": {"fd00::100/120"}, + "layer3_3": {"fd00::200/120"}, + }, + }, + { + name: "dual-stack allocation", + ipv4Mode: true, + ipv6Mode: true, + owners: []string{"layer3_1"}, + expectSubnets: map[string][]string{ + // IPv4 /24, IPv6 /120 (both have 8 host bits) + "layer3_1": {"192.168.0.0/24", "fd00::/120"}, + }, + }, + { + name: "multiple dual-stack allocations", + ipv4Mode: true, + ipv6Mode: true, + owners: []string{"layer3_1", "layer3_2", "layer3_3"}, + expectSubnets: map[string][]string{ + "layer3_1": {"192.168.0.0/24", "fd00::/120"}, + "layer3_2": {"192.168.1.0/24", "fd00::100/120"}, + "layer3_3": {"192.168.2.0/24", "fd00::200/120"}, + }, + }, + { + name: "same owner gets same subnet on repeated allocation", + ipv4Mode: true, + ipv6Mode: false, + owners: []string{"layer3_1", "layer3_1"}, // same owner twice + expectSubnets: map[string][]string{ + "layer3_1": {"192.168.0.0/24"}, + }, + }, + { + name: "different owners get different subnets", + ipv4Mode: true, + ipv6Mode: false, + owners: []string{"layer3_1", "layer3_2"}, + expectSubnets: map[string][]string{ + "layer3_1": {"192.168.0.0/24"}, + "layer3_2": {"192.168.1.0/24"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = tt.ipv4Mode + config.IPv6Mode = tt.ipv6Mode + + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: "192.168.0.0/16", + NetworkPrefix: 24, + }, + { + CIDR: "fd00::/112", + NetworkPrefix: 120, // 32-24=8, so 128-8=120 + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + if err != nil { + t.Fatalf("failed to create subnet allocator: %v", err) + } + + // Allocate subnets + for _, owner := range tt.owners { + subnets, err := allocator.AllocateLayer3Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + if expected, ok := tt.expectSubnets[owner]; ok { + g.Expect(subnets).To(gomega.HaveLen(len(expected))) + for i, subnet := range subnets { + g.Expect(subnet.String()).To(gomega.Equal(expected[i])) + } + } + } + }) + } +} + +func TestHybridConnectSubnetAllocator_AllocateLayer2Subnet(t *testing.T) { + tests := []struct { + name string + ipv4Mode bool + ipv6Mode bool + owners []string + expectSubnets map[string][]string + }{ + { + name: "single IPv4 layer2 allocation gets /31", + ipv4Mode: true, + ipv6Mode: false, + owners: []string{"layer2_1"}, + expectSubnets: map[string][]string{ + "layer2_1": {"192.168.0.0/31"}, + }, + }, + { + name: "multiple IPv4 layer2 allocations get /31 each", + ipv4Mode: true, + ipv6Mode: false, + owners: []string{"layer2_1", "layer2_2", "layer2_3"}, + expectSubnets: map[string][]string{ + "layer2_1": {"192.168.0.0/31"}, + "layer2_2": {"192.168.0.2/31"}, + "layer2_3": {"192.168.0.4/31"}, + }, + }, + { + name: "single IPv6 layer2 allocation gets /127", + ipv4Mode: false, + ipv6Mode: true, + owners: []string{"layer2_1"}, + expectSubnets: map[string][]string{ + // Layer2 block gets /120 block from layer3 (fd00::/120), then allocates /127 from it + // /127 has subnetBits = 127 - 120 = 7, which is < 16, so no address skipping + "layer2_1": {"fd00::/127"}, + }, + }, + { + name: "multiple IPv6 layer2 allocations get /127 each", + ipv4Mode: false, + ipv6Mode: true, + owners: []string{"layer2_1", "layer2_2", "layer2_3"}, + expectSubnets: map[string][]string{ + "layer2_1": {"fd00::/127"}, + "layer2_2": {"fd00::2/127"}, + "layer2_3": {"fd00::4/127"}, + }, + }, + { + name: "dual-stack layer2 allocation", + ipv4Mode: true, + ipv6Mode: true, + owners: []string{"layer2_1"}, + expectSubnets: map[string][]string{ + // Layer2 block gets /24 block from layer3 (192.168.0.0/24), then allocates /31 from it + // IPv6 block gets /120 block from layer3 (fd00::/120), then allocates /127 from it + "layer2_1": {"192.168.0.0/31", "fd00::/127"}, + }, + }, + { + name: "multiple dual-stack layer2 allocations", + ipv4Mode: true, + ipv6Mode: true, + owners: []string{"layer2_1", "layer2_2", "layer2_3"}, + expectSubnets: map[string][]string{ + "layer2_1": {"192.168.0.0/31", "fd00::/127"}, + "layer2_2": {"192.168.0.2/31", "fd00::2/127"}, + "layer2_3": {"192.168.0.4/31", "fd00::4/127"}, + }, + }, + { + name: "same owner gets same subnet on repeated allocation", + ipv4Mode: true, + ipv6Mode: false, + owners: []string{"layer2_1", "layer2_1"}, // same owner twice + expectSubnets: map[string][]string{ + "layer2_1": {"192.168.0.0/31"}, + }, + }, + { + name: "different owners get different subnets", + ipv4Mode: true, + ipv6Mode: false, + owners: []string{"layer2_1", "layer2_2"}, + expectSubnets: map[string][]string{ + "layer2_1": {"192.168.0.0/31"}, + "layer2_2": {"192.168.0.2/31"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = tt.ipv4Mode + config.IPv6Mode = tt.ipv6Mode + + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: "192.168.0.0/16", + NetworkPrefix: 24, + }, + { + CIDR: "fd00::/112", + NetworkPrefix: 120, // 32-24=8, so 128-8=120 + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + if err != nil { + t.Fatalf("failed to create subnet allocator: %v", err) + } + + // Allocate layer2 subnets + for _, owner := range tt.owners { + subnets, err := allocator.AllocateLayer2Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + if expected, ok := tt.expectSubnets[owner]; ok { + g.Expect(subnets).To(gomega.HaveLen(len(expected))) + for i, subnet := range subnets { + g.Expect(subnet.String()).To(gomega.Equal(expected[i])) + } + } + } + }) + } +} + +func TestHybridConnectSubnetAllocator_AllocateMixedLayer3AndLayer2Subnets(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = false + + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: "192.168.0.0/16", + NetworkPrefix: 24, + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + if err != nil { + t.Fatalf("failed to create subnet allocator: %v", err) + } + + // Allocate some Layer3 subnets first + layer3Subnets := make(map[string][]*net.IPNet) + for i := 1; i <= 3; i++ { + owner := "layer3_" + string(rune('0'+i)) + subnets, err := allocator.AllocateLayer3Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + layer3Subnets[owner] = subnets + } + + // Now allocate Layer2 subnets - they should come from a different /24 block + layer2Subnets := make(map[string][]*net.IPNet) + for i := 1; i <= 5; i++ { + owner := "layer2_" + string(rune('0'+i)) + subnets, err := allocator.AllocateLayer2Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + layer2Subnets[owner] = subnets + + // Verify Layer2 subnets are /31 + ones, _ := subnets[0].Mask.Size() + g.Expect(ones).To(gomega.Equal(31)) + } + + // Verify no overlap between Layer3 and Layer2 subnets. + // Layer2 subnets are /31s within a /24 block allocated from layer3 for the layer2-block. + // So we verify they're in different /24 blocks than the Layer3 subnets. + for _, l3Subnets := range layer3Subnets { + for _, l3Subnet := range l3Subnets { + for _, l2Subnets := range layer2Subnets { + for _, l2Subnet := range l2Subnets { + l3Network := l3Subnet.IP.Mask(net.CIDRMask(24, 32)) + l2Network := l2Subnet.IP.Mask(net.CIDRMask(24, 32)) + g.Expect(l3Network.String()).ToNot(gomega.Equal(l2Network.String()), + "Layer3 subnet %s and Layer2 subnet %s should not be in the same /24 block", + l3Subnet.String(), l2Subnet.String()) + } + } + } + } +} + +func TestHybridConnectSubnetAllocator_ReleaseLayer3Subnets(t *testing.T) { + tests := []struct { + name string + ipv4Mode bool + ipv6Mode bool + ipv4Network string + ipv4NetworkPrefix int + ipv6Network string + ipv6NetworkPrefix int + allocateFirst []string + release []string + allocateAgain []string + expectSubnets map[string][]string // owner -> expected subnets after re-allocation of released subnets + }{ + { + name: "IPv4 allocation continues from where it left off after release", + ipv4Mode: true, + ipv6Mode: false, + ipv4Network: "192.168.0.0/16", + ipv4NetworkPrefix: 24, + allocateFirst: []string{"layer3_1", "layer3_2"}, + release: []string{"layer3_1"}, + allocateAgain: []string{"layer3_3"}, + expectSubnets: map[string][]string{ + // Allocator continues from next position, doesn't immediately reuse released subnet + "layer3_3": {"192.168.2.0/24"}, + }, + }, + { + name: "IPv4 released subnet is reused when allocator wraps around", + ipv4Mode: true, + ipv6Mode: false, + ipv4Network: "192.168.0.0/24", // Small range: only 4 /26 subnets + ipv4NetworkPrefix: 26, + allocateFirst: []string{"layer3_1", "layer3_2", "layer3_3", "layer3_4"}, + release: []string{"layer3_2"}, // Release the second one + allocateAgain: []string{"layer3_5"}, + expectSubnets: map[string][]string{ + // Allocator wraps around and reuses the released subnet + "layer3_5": {"192.168.0.64/26"}, + }, + }, + { + name: "IPv6 allocation continues from where it left off after release", + ipv4Mode: false, + ipv6Mode: true, + ipv6Network: "fd00::/112", + ipv6NetworkPrefix: 120, // matches ipv4 /24: 32-24=8, 128-8=120 + allocateFirst: []string{"layer3_1", "layer3_2"}, + release: []string{"layer3_1"}, + allocateAgain: []string{"layer3_3"}, + expectSubnets: map[string][]string{ + // Allocator continues from next position: layer3_1=fd00::/120, layer3_2=fd00::100/120 + // layer3_3 gets fd00::200/120 + "layer3_3": {"fd00::200/120"}, + }, + }, + { + name: "IPv6 released subnet is reused when allocator wraps around", + ipv4Mode: false, + ipv6Mode: true, + ipv6Network: "fd00::/120", // Small range: only 4 /122 subnets + ipv6NetworkPrefix: 122, // matches ipv4 /26: 32-26=6, 128-6=122 + allocateFirst: []string{"layer3_1", "layer3_2", "layer3_3", "layer3_4"}, // Allocate all 4 + release: []string{"layer3_2"}, // Release the second one + allocateAgain: []string{"layer3_5"}, + expectSubnets: map[string][]string{ + // Allocator wraps around and reuses the released subnet (fd00::40/122) + "layer3_5": {"fd00::40/122"}, + }, + }, + { + name: "dual-stack allocation continues from where it left off after release", + ipv4Mode: true, + ipv6Mode: true, + ipv4Network: "192.168.0.0/16", + ipv4NetworkPrefix: 24, + ipv6Network: "fd00::/112", + ipv6NetworkPrefix: 120, // matches ipv4 /24: 32-24=8, 128-8=120 + allocateFirst: []string{"layer3_1", "layer3_2"}, + release: []string{"layer3_1"}, + allocateAgain: []string{"layer3_3"}, + expectSubnets: map[string][]string{ + // Both IPv4 and IPv6 continue from next position + "layer3_3": {"192.168.2.0/24", "fd00::200/120"}, + }, + }, + { + name: "dual-stack released subnet is reused when allocator wraps around", + ipv4Mode: true, + ipv6Mode: true, + ipv4Network: "192.168.0.0/24", // Small range: only 4 /26 subnets + ipv4NetworkPrefix: 26, + ipv6Network: "fd00::/120", // Small range: only 4 /122 subnets + ipv6NetworkPrefix: 122, // matches ipv4 /26: 32-26=6, 128-6=122 + allocateFirst: []string{"layer3_1", "layer3_2", "layer3_3", "layer3_4"}, // Allocate all 4 + release: []string{"layer3_2"}, // Release the second one + allocateAgain: []string{"layer3_5"}, + expectSubnets: map[string][]string{ + // Both IPv4 and IPv6 wrap around and reuse the released subnet + // IPv4: 192.168.0.64/26 was layer3_2; IPv6: fd00::40/122 was layer3_2 + "layer3_5": {"192.168.0.64/26", "fd00::40/122"}, + }, + }, + { + name: "releasing non-existent owner is safe", + ipv4Mode: true, + ipv6Mode: false, + ipv4Network: "192.168.0.0/16", + ipv4NetworkPrefix: 24, + allocateFirst: []string{"layer3_1"}, + release: []string{"layer3_nonexistent"}, + allocateAgain: []string{"layer3_2"}, + expectSubnets: map[string][]string{ + "layer3_2": {"192.168.1.0/24"}, // gets next available (nothing was actually released) + }, + }, + { + name: "same owner gets same subnet without re-allocating", + ipv4Mode: true, + ipv6Mode: false, + ipv4Network: "192.168.0.0/16", + ipv4NetworkPrefix: 24, + allocateFirst: []string{"layer3_1", "layer3_2"}, + release: []string{}, // no release + allocateAgain: []string{"layer3_1"}, + expectSubnets: map[string][]string{ + // Same owner gets same subnet back + "layer3_1": {"192.168.0.0/24"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = tt.ipv4Mode + config.IPv6Mode = tt.ipv6Mode + + connectSubnets := []networkconnectv1.ConnectSubnet{} + if tt.ipv4Mode && tt.ipv4Network != "" { + connectSubnets = append(connectSubnets, networkconnectv1.ConnectSubnet{ + CIDR: networkconnectv1.CIDR(tt.ipv4Network), + NetworkPrefix: int32(tt.ipv4NetworkPrefix), + }) + } + if tt.ipv6Mode && tt.ipv6Network != "" { + connectSubnets = append(connectSubnets, networkconnectv1.ConnectSubnet{ + CIDR: networkconnectv1.CIDR(tt.ipv6Network), + NetworkPrefix: int32(tt.ipv6NetworkPrefix), + }) + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + if err != nil { + t.Fatalf("failed to create subnet allocator: %v", err) + } + + // First allocation + for _, owner := range tt.allocateFirst { + _, err := allocator.AllocateLayer3Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + // Release + for _, owner := range tt.release { + allocator.ReleaseLayer3Subnet(owner) + } + + // Allocate again and verify + for _, owner := range tt.allocateAgain { + subnets, err := allocator.AllocateLayer3Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + if expected, ok := tt.expectSubnets[owner]; ok { + g.Expect(subnets).To(gomega.HaveLen(len(expected))) + for i, subnet := range subnets { + g.Expect(subnet.String()).To(gomega.Equal(expected[i])) + } + } + } + }) + } +} + +func TestHybridConnectSubnetAllocator_ReleaseLayer2Subnets(t *testing.T) { + tests := []struct { + name string + ipv4Mode bool + ipv6Mode bool + ipv4Network string + ipv4NetworkPrefix int + ipv6Network string + ipv6NetworkPrefix int + allocateFirst []string + release []string + allocateAgain []string + expectSubnets map[string][]string // owner -> expected subnets after re-allocation + }{ + { + name: "IPv4 allocation continues from where it left off after release", + ipv4Mode: true, + ipv6Mode: false, + ipv4Network: "192.168.0.0/16", + ipv4NetworkPrefix: 24, + allocateFirst: []string{"layer2_1", "layer2_2"}, + release: []string{"layer2_1"}, + allocateAgain: []string{"layer2_3"}, + expectSubnets: map[string][]string{ + // Allocator continues from next position, doesn't immediately reuse released subnet + "layer2_3": {"192.168.0.4/31"}, + }, + }, + { + name: "IPv4 released subnet is reused before expanding to new layer3 block", + ipv4Mode: true, + ipv6Mode: false, + ipv4Network: "192.168.0.0/27", // Two /28 blocks available at layer3 level + ipv4NetworkPrefix: 28, + // Fill the first /28 block (8 /31s), then release one + allocateFirst: []string{"layer2_1", "layer2_2", "layer2_3", "layer2_4", "layer2_5", "layer2_6", "layer2_7", "layer2_8"}, + release: []string{"layer2_5"}, // Release 192.168.0.8/31 + allocateAgain: []string{"layer2_9"}, + expectSubnets: map[string][]string{ + // Should reuse the released /31 from first block, not expand to second /28 block + "layer2_9": {"192.168.0.8/31"}, + }, + }, + { + name: "IPv6 allocation continues from where it left off after release", + ipv4Mode: false, + ipv6Mode: true, + ipv6Network: "fd00::/48", + ipv6NetworkPrefix: 64, + allocateFirst: []string{"layer2_1", "layer2_2"}, + release: []string{"layer2_1"}, + allocateAgain: []string{"layer2_3"}, + expectSubnets: map[string][]string{ + // Allocator continues from next position (layer2 block gets /64, then allocates /127s) + "layer2_3": {"fd00:0:0:1::6/127"}, + }, + }, + { + name: "IPv6 released subnet is reused before expanding to new layer3 block", + ipv4Mode: false, + ipv6Mode: true, + ipv6Network: "fd00::/123", // Two /124 blocks available at layer3 level + ipv6NetworkPrefix: 124, // Each /124 = 16 IPs = 8 /127s + // Fill the first /124 block (8 /127s), then release one + allocateFirst: []string{"layer2_1", "layer2_2", "layer2_3", "layer2_4", "layer2_5", "layer2_6", "layer2_7", "layer2_8"}, + release: []string{"layer2_5"}, // Release the fifth one (fd00::8/127) + allocateAgain: []string{"layer2_9"}, + expectSubnets: map[string][]string{ + // Should reuse the released /127 from first block, not expand to second /124 block + "layer2_9": {"fd00::8/127"}, + }, + }, + { + name: "dual-stack allocation continues from where it left off after release", + ipv4Mode: true, + ipv6Mode: true, + ipv4Network: "192.168.0.0/16", + ipv4NetworkPrefix: 24, + ipv6Network: "fd00::/112", + ipv6NetworkPrefix: 120, // matches ipv4 /24: 32-24=8, 128-8=120 + allocateFirst: []string{"layer2_1", "layer2_2"}, + release: []string{"layer2_1"}, + allocateAgain: []string{"layer2_3"}, + expectSubnets: map[string][]string{ + // Both IPv4 and IPv6 continue from next position (index 2) + "layer2_3": {"192.168.0.4/31", "fd00::4/127"}, + }, + }, + { + name: "dual-stack released subnet is reused before expanding to new layer3 block", + ipv4Mode: true, + ipv6Mode: true, + ipv4Network: "192.168.0.0/27", // Two /28 blocks at layer3, each /28 = 8 /31s for layer2 + ipv4NetworkPrefix: 28, + ipv6Network: "fd00::/123", // Two /124 blocks at layer3, each /124 = 8 /127s for layer2 + ipv6NetworkPrefix: 124, + // Fill the first blocks (8 /31s for IPv4, 8 /127s for IPv6), then release one + allocateFirst: []string{"layer2_1", "layer2_2", "layer2_3", "layer2_4", "layer2_5", "layer2_6", "layer2_7", "layer2_8"}, + release: []string{"layer2_5"}, // Release the fifth one + allocateAgain: []string{"layer2_9"}, + expectSubnets: map[string][]string{ + // Should reuse the released subnets from first blocks, not expand to second blocks + "layer2_9": {"192.168.0.8/31", "fd00::8/127"}, + }, + }, + { + name: "releasing non-existent owner is safe", + ipv4Mode: true, + ipv6Mode: false, + ipv4Network: "192.168.0.0/16", + ipv4NetworkPrefix: 24, + allocateFirst: []string{"layer2_1"}, + release: []string{"layer2_nonexistent"}, + allocateAgain: []string{"layer2_2"}, + expectSubnets: map[string][]string{ + "layer2_2": {"192.168.0.2/31"}, // gets next available (nothing was actually released) + }, + }, + { + name: "same owner gets same subnet without re-allocating", + ipv4Mode: true, + ipv6Mode: false, + ipv4Network: "192.168.0.0/16", + ipv4NetworkPrefix: 24, + allocateFirst: []string{"layer2_1", "layer2_2"}, + release: []string{}, // no release + allocateAgain: []string{"layer2_1"}, + expectSubnets: map[string][]string{ + // Same owner gets same subnet back + "layer2_1": {"192.168.0.0/31"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = tt.ipv4Mode + config.IPv6Mode = tt.ipv6Mode + + connectSubnets := []networkconnectv1.ConnectSubnet{} + if tt.ipv4Mode && tt.ipv4Network != "" { + connectSubnets = append(connectSubnets, networkconnectv1.ConnectSubnet{ + CIDR: networkconnectv1.CIDR(tt.ipv4Network), + NetworkPrefix: int32(tt.ipv4NetworkPrefix), + }) + } + if tt.ipv6Mode && tt.ipv6Network != "" { + connectSubnets = append(connectSubnets, networkconnectv1.ConnectSubnet{ + CIDR: networkconnectv1.CIDR(tt.ipv6Network), + NetworkPrefix: int32(tt.ipv6NetworkPrefix), + }) + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // First allocation + for _, owner := range tt.allocateFirst { + _, err := allocator.AllocateLayer2Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + // Release + for _, owner := range tt.release { + allocator.ReleaseLayer2Subnet(owner) + } + + // Allocate again and verify + for _, owner := range tt.allocateAgain { + subnets, err := allocator.AllocateLayer2Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + if expected, ok := tt.expectSubnets[owner]; ok { + g.Expect(subnets).To(gomega.HaveLen(len(expected))) + for i, subnet := range subnets { + g.Expect(subnet.String()).To(gomega.Equal(expected[i])) + } + } + } + }) + } +} + +func TestHybridConnectSubnetAllocator_ReleaseMixedLayer3AndLayer2Subnets(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = true + + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: networkconnectv1.CIDR("192.168.0.0/16"), + NetworkPrefix: 24, + }, + { + CIDR: networkconnectv1.CIDR("fd00::/112"), + NetworkPrefix: 120, // matches ipv4 /24: 32-24=8, 128-8=120 + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Allocate layer3 subnets (dual-stack: both IPv4 and IPv6) + // With /120 prefix, IPv6 blocks are fd00::/120, fd00::100/120, etc. + l3Sub1, err := allocator.AllocateLayer3Subnet("layer3_1") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l3Sub1).To(gomega.HaveLen(2)) + g.Expect(l3Sub1[0].String()).To(gomega.Equal("192.168.0.0/24")) + g.Expect(l3Sub1[1].String()).To(gomega.Equal("fd00::/120")) + + l3Sub2, err := allocator.AllocateLayer3Subnet("layer3_2") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l3Sub2).To(gomega.HaveLen(2)) + g.Expect(l3Sub2[0].String()).To(gomega.Equal("192.168.1.0/24")) + g.Expect(l3Sub2[1].String()).To(gomega.Equal("fd00::100/120")) + + // Allocate layer2 subnets (will get new blocks from layer3: 192.168.2.0/24 and fd00::200/120) + l2Sub1, err := allocator.AllocateLayer2Subnet("layer2_1") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l2Sub1).To(gomega.HaveLen(2)) + g.Expect(l2Sub1[0].String()).To(gomega.Equal("192.168.2.0/31")) + g.Expect(l2Sub1[1].String()).To(gomega.Equal("fd00::200/127")) + + l2Sub2, err := allocator.AllocateLayer2Subnet("layer2_2") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l2Sub2).To(gomega.HaveLen(2)) + g.Expect(l2Sub2[0].String()).To(gomega.Equal("192.168.2.2/31")) + g.Expect(l2Sub2[1].String()).To(gomega.Equal("fd00::202/127")) + + // Release one layer3 and one layer2 + allocator.ReleaseLayer3Subnet("layer3_1") + allocator.ReleaseLayer2Subnet("layer2_1") + + // Allocate new layer3 - allocator continues from where it left off + // IPv4: 192.168.2.0/24 is taken by layer2-block, so next available is 192.168.3.0/24 + // IPv6: fd00::200/120 is taken by layer2-block, so next available is fd00::300/120 + l3Sub3, err := allocator.AllocateLayer3Subnet("layer3_3") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l3Sub3).To(gomega.HaveLen(2)) + g.Expect(l3Sub3[0].String()).To(gomega.Equal("192.168.3.0/24")) + g.Expect(l3Sub3[1].String()).To(gomega.Equal("fd00::300/120")) + + // Allocate new layer2 - allocator continues from where it left off + l2Sub3, err := allocator.AllocateLayer2Subnet("layer2_3") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l2Sub3).To(gomega.HaveLen(2)) + g.Expect(l2Sub3[0].String()).To(gomega.Equal("192.168.2.4/31")) + g.Expect(l2Sub3[1].String()).To(gomega.Equal("fd00::204/127")) + + // Verify layer3 and layer2 allocators are independent + // Allocate more layer2 - should continue in layer2 block + l2Sub4, err := allocator.AllocateLayer2Subnet("layer2_4") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l2Sub4).To(gomega.HaveLen(2)) + g.Expect(l2Sub4[0].String()).To(gomega.Equal("192.168.2.6/31")) + g.Expect(l2Sub4[1].String()).To(gomega.Equal("fd00::206/127")) + + // Allocate layer3 - should continue in layer3 space + l3Sub4, err := allocator.AllocateLayer3Subnet("layer3_4") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l3Sub4).To(gomega.HaveLen(2)) + g.Expect(l3Sub4[0].String()).To(gomega.Equal("192.168.4.0/24")) + g.Expect(l3Sub4[1].String()).To(gomega.Equal("fd00::400/120")) + + // Verify released owner gets new subnet (not the old one) + l3Sub1Again, err := allocator.AllocateLayer3Subnet("layer3_1") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l3Sub1Again).To(gomega.HaveLen(2)) + // layer3_1 was released, so new allocation continues from next position + g.Expect(l3Sub1Again[0].String()).To(gomega.Equal("192.168.5.0/24")) + g.Expect(l3Sub1Again[1].String()).To(gomega.Equal("fd00::500/120")) +} + +func TestHybridConnectSubnetAllocator_Layer2BlockExpansionFromLayer3(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = true + + // Use small CIDRs to test block exhaustion and expansion + // IPv4: 192.168.0.0/24 with /28 prefix gives us 16 /28 blocks, each holding 8 /31 subnets + // IPv6: fd00::/120 with /124 prefix gives us 16 /124 blocks, each holding 8 /127 subnets + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: networkconnectv1.CIDR("192.168.0.0/24"), + NetworkPrefix: 28, + }, + { + CIDR: networkconnectv1.CIDR("fd00::/120"), + NetworkPrefix: 124, + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Allocate more than 8 subnets to trigger expansion of new blocks from layer3 to layer2 + // First 8 allocations will use the first /28 (IPv4) and /124 (IPv6) blocks + // 9th allocation should trigger expansion - allocating new blocks from layer3 + layer2Subnets := make(map[string][]*net.IPNet) + for i := 1; i <= 20; i++ { // Allocate 20 to span multiple blocks + owner := fmt.Sprintf("layer2_%d", i) + subnets, err := allocator.AllocateLayer2Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.HaveLen(2)) // dual-stack: IPv4 + IPv6 + layer2Subnets[owner] = subnets + + // Verify IPv4 is /31 + ipv4Ones, _ := subnets[0].Mask.Size() + g.Expect(ipv4Ones).To(gomega.Equal(31)) + + // Verify IPv6 is /127 + ipv6Ones, _ := subnets[1].Mask.Size() + g.Expect(ipv6Ones).To(gomega.Equal(127)) + } + + // All 20 allocations should have succeeded + g.Expect(layer2Subnets).To(gomega.HaveLen(20)) + + // Verify IPv4 subnets span multiple /28 blocks (i.e., expansion happened) + ipv4BlocksSeen := make(map[string]bool) + for _, subnets := range layer2Subnets { + ip := subnets[0].IP.To4() + blockStart := ip[3] & 0xF0 // mask to /28 boundary + blockKey := fmt.Sprintf("192.168.0.%d/28", blockStart) + ipv4BlocksSeen[blockKey] = true + } + // With 20 /31 allocations, we need at least 3 /28 blocks (8 + 8 + 4) + g.Expect(len(ipv4BlocksSeen)).To(gomega.BeNumerically(">=", 3), + "Expected at least 3 IPv4 /28 blocks to be used, got %d: %v", len(ipv4BlocksSeen), ipv4BlocksSeen) + + // Verify IPv6 subnets span multiple /124 blocks (i.e., expansion happened) + ipv6BlocksSeen := make(map[string]bool) + for _, subnets := range layer2Subnets { + ip := subnets[1].IP.To16() + // For /124, the block boundary is at the last nibble (4 bits) + blockStart := ip[15] & 0xF0 // mask to /124 boundary + blockKey := fmt.Sprintf("fd00::%x/124", blockStart) + ipv6BlocksSeen[blockKey] = true + } + // With 20 /127 allocations, we need at least 3 /124 blocks (8 + 8 + 4) + g.Expect(len(ipv6BlocksSeen)).To(gomega.BeNumerically(">=", 3), + "Expected at least 3 IPv6 /124 blocks to be used, got %d: %v", len(ipv6BlocksSeen), ipv6BlocksSeen) +} + +func TestHybridConnectSubnetAllocator_Layer3RangeFull(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = false + + // Use a very small CIDR that can only hold 4 /26 subnets + // 192.168.0.0/24 with /26 prefix = 4 subnets (256 IPs / 64 IPs per /26 = 4) + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: networkconnectv1.CIDR("192.168.0.0/24"), + NetworkPrefix: 26, + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Allocate all 4 available /26 subnets + for i := 1; i <= 4; i++ { + owner := fmt.Sprintf("layer3_%d", i) + subnets, err := allocator.AllocateLayer3Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.HaveLen(1)) + + // Verify it's a /26 + ones, _ := subnets[0].Mask.Size() + g.Expect(ones).To(gomega.Equal(26)) + } + + // 5th allocation should fail - range is exhausted + _, err = allocator.AllocateLayer3Subnet("layer3_5") + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(err.Error()).To(gomega.ContainSubstring("Layer3 allocation failed")) +} + +func TestHybridConnectSubnetAllocator_Layer2RangeFullAfterLayer3Exhausted(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = true + + // Use very small CIDRs that can only hold 2 subnets each + // IPv4: 192.168.0.0/24 with /25 prefix = 2 subnets + // IPv6: fd00::/121 with /122 prefix = 2 subnets + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: networkconnectv1.CIDR("192.168.0.0/24"), + NetworkPrefix: 25, + }, + { + CIDR: networkconnectv1.CIDR("fd00::/114"), + NetworkPrefix: 121, // matches ipv4 /25: 32-25=7, 128-7=121 + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Allocate both subnets as layer3 (exhausts both IPv4 and IPv6 ranges) + for i := 1; i <= 2; i++ { + owner := fmt.Sprintf("layer3_%d", i) + subnets, err := allocator.AllocateLayer3Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.HaveLen(2)) // dual-stack: one IPv4 + one IPv6 + } + + // Now try to allocate layer2 - this should fail because + // layer2 needs to expand by getting blocks from layer3, but layer3 is exhausted + _, err = allocator.AllocateLayer2Subnet("layer2_1") + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(err.Error()).To(gomega.ContainSubstring("failed to expand Layer2 allocator")) +} + +func TestHybridConnectSubnetAllocator_Layer2CanReuseFromEarlierRange(t *testing.T) { + // This test confirms that when the layer2 allocator has multiple ranges (blocks), + // it can reuse a released slot from an EARLIER range, not just the most recent one. + // This is important for understanding block release behavior. + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = false + + // Small range: 192.168.0.0/26 with /28 prefix = 4 /28 blocks + // Each /28 block has 8 /31 slots + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: networkconnectv1.CIDR("192.168.0.0/26"), + NetworkPrefix: 28, + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Allocate 8 layer2 networks - this fills the first /28 block + // First allocation triggers expansion, gets 192.168.0.0/28 + firstBlockOwners := make([]string, 8) + for i := 0; i < 8; i++ { + owner := fmt.Sprintf("layer2_block1_%d", i) + firstBlockOwners[i] = owner + subnets, err := allocator.AllocateLayer2Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.HaveLen(1)) + // All should be in 192.168.0.0/28 range (192.168.0.0 - 192.168.0.15) + g.Expect(subnets[0].IP[3]).To(gomega.BeNumerically("<", 16)) + } + + // Allocate one more - this triggers expansion to second /28 block (192.168.0.16/28) + secondBlockOwner := "layer2_block2_0" + subnets, err := allocator.AllocateLayer2Subnet(secondBlockOwner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.HaveLen(1)) + // Should be in second block (192.168.0.16/28 range) + g.Expect(subnets[0].String()).To(gomega.Equal("192.168.0.16/31")) + + // Now release one from the FIRST block + allocator.ReleaseLayer2Subnet(firstBlockOwners[0]) // releases 192.168.0.0/31 + + // Allocate again - should reuse the released slot from the FIRST block + // (not allocate from the second block which also has free slots) + newOwner := "layer2_new" + subnets, err = allocator.AllocateLayer2Subnet(newOwner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.HaveLen(1)) + + // The key assertion: the allocation should reuse 192.168.0.0/31 from first block + // This confirms the allocator can pick from earlier ranges, not just the latest + g.Expect(subnets[0].String()).To(gomega.Equal("192.168.0.0/31")) +} + +func TestHybridConnectSubnetAllocator_Layer2ReleaseReleasesBlockToLayer3(t *testing.T) { + // Test that blocks are released back to layer3 only when ALL layer2 owners + // in that block are released. Also verifies the range is removed from layer2Allocator. + // This test covers: + // 1. Partial release (block NOT released back to layer3) + // 2. Full release (block released back to layer3 AND removed from layer2Allocator) + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = true + + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: networkconnectv1.CIDR("10.100.0.0/26"), + NetworkPrefix: 28, // 32-28=4 host bits + }, + { + CIDR: networkconnectv1.CIDR("fd00::/122"), + NetworkPrefix: 124, // 128-124=4 host bits (matches IPv4) + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + // Small ranges: 4 blocks each + // IPv4: 10.100.0.0/26 with /28 prefix = 4 /28 blocks (each has 8 /31 slots) + // IPv6: fd00::/122 with /124 prefix = 4 /124 blocks (each has 8 /127 slots) + + // Allocate 2 layer2 networks - both in the same block + l2Sub1, err := allocator.AllocateLayer2Subnet("layer2_1") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l2Sub1).To(gomega.HaveLen(2)) // dual-stack + g.Expect(l2Sub1[0].String()).To(gomega.Equal("10.100.0.0/31")) + // /124 to /127: subnetBits = 3, which is < 16, so it doesn't skip subnet 0 + g.Expect(l2Sub1[1].String()).To(gomega.Equal("fd00::/127")) + + l2Sub2, err := allocator.AllocateLayer2Subnet("layer2_2") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l2Sub2).To(gomega.HaveLen(2)) // dual-stack + + // Verify layer2 allocator has 1 range each (the block) + v4RangeCount, v6RangeCount := allocator.Layer2RangeCount() + g.Expect(v4RangeCount).To(gomega.Equal(uint64(1))) + g.Expect(v6RangeCount).To(gomega.Equal(uint64(1))) + + // Use up the remaining layer3 blocks + _, err = allocator.AllocateLayer3Subnet("layer3_1") + g.Expect(err).ToNot(gomega.HaveOccurred()) + _, err = allocator.AllocateLayer3Subnet("layer3_2") + g.Expect(err).ToNot(gomega.HaveOccurred()) + _, err = allocator.AllocateLayer3Subnet("layer3_3") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Layer3 should be full now (1 block used by layer2 networks + 3 blocks used by layer3) + _, err = allocator.AllocateLayer3Subnet("layer3_should_fail") + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(err.Error()).To(gomega.ContainSubstring("Layer3 allocation failed")) + + // PARTIAL RELEASE: Release only ONE layer2 network - block should NOT be released + allocator.ReleaseLayer2Subnet("layer2_1") + + // Layer2 allocator should still have 1 range each (block not released yet) + v4RangeCount, v6RangeCount = allocator.Layer2RangeCount() + g.Expect(v4RangeCount).To(gomega.Equal(uint64(1))) + g.Expect(v6RangeCount).To(gomega.Equal(uint64(1))) + + // Layer3 should still be full (block not released because layer2_2 still using it) + _, err = allocator.AllocateLayer3Subnet("layer3_still_full") + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(err.Error()).To(gomega.ContainSubstring("Layer3 allocation failed")) + + // FULL RELEASE: Release the other layer2 network - block should now be released + allocator.ReleaseLayer2Subnet("layer2_2") + + // Layer2 allocator should now have 0 ranges (block removed via FreeUnusedRanges) + v4RangeCount, v6RangeCount = allocator.Layer2RangeCount() + g.Expect(v4RangeCount).To(gomega.Equal(uint64(0))) + g.Expect(v6RangeCount).To(gomega.Equal(uint64(0))) + + // Now layer3 should have a free block (the block was released back) + l3Sub4, err := allocator.AllocateLayer3Subnet("layer3_4") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(l3Sub4).To(gomega.HaveLen(2)) // dual-stack + // Should get the block that was released from layer2 networks + g.Expect(l3Sub4[0].String()).To(gomega.Equal("10.100.0.0/28")) + g.Expect(l3Sub4[1].String()).To(gomega.Equal("fd00::/124")) +} + +func TestHybridConnectSubnetAllocator_Layer2DesyncBugWithMismatchedNetworkPrefix(t *testing.T) { + // BUG DEMONSTRATION: When IPv4 and IPv6 have different networkPrefix "host bits", + // they have different capacities per block. When one fills up before the other, + // the allocator expands but the next allocation gets IPv4 from new block and + // IPv6 from old block (still has room). This causes desync and breaks block release. + // + // Formula for matching: 32 - v4NetworkPrefix == 128 - v6NetworkPrefix + // This test intentionally uses MISMATCHED prefixes to demonstrate the bug. + // We have added CEL validation for this on the API so that its not possible, but + // this test is left here for reference and to ensure we don't use the allocator + // in this fashion. + t.Skip("This test demonstrates the desync bug that CEL validation prevents - skipped in CI but kept for documentation") + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = true + + // MISMATCHED networkPrefix values: + // IPv4 /28: host bits = 32 - 28 = 4 → 2^4 / 2 = 8 /31 slots per block + // IPv6 /123: host bits = 128 - 123 = 5 → 2^5 / 2 = 16 /127 slots per block + // IPv4 will fill up FIRST! + // + // Use SMALL CIDRs so we can detect leaks: + // IPv4: /26 with /28 = 4 blocks only + // IPv6: /121 with /123 = 4 blocks only + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: networkconnectv1.CIDR("10.100.0.0/26"), // Only 4 /28 blocks + NetworkPrefix: 28, // 8 /31 slots per block + }, + { + CIDR: networkconnectv1.CIDR("fd00::/121"), // Only 4 /123 blocks + NetworkPrefix: 123, // 16 /127 slots per block + }, + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Allocate 9 L2 networks - this will exceed IPv4's 8 slots per block + // Networks 1-8: both from block1 + // Network 9: IPv4 from block2, IPv6 from block1 (DESYNC!) + l2Owners := make([]string, 9) + for i := 0; i < 9; i++ { + owner := fmt.Sprintf("layer2_%d", i) + l2Owners[i] = owner + subnets, err := allocator.AllocateLayer2Subnet(owner) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.HaveLen(2)) // dual-stack + + if i == 8 { + // Network 9 (index 8) should show the desync: + // IPv4: 10.100.0.16/31 (first slot in block2 = 10.100.0.16/28) + // IPv6: fd00::10/127 (slot 9 in block1 = fd00::/123) + t.Logf("Network 9 (desynced): IPv4=%s, IPv6=%s", subnets[0].String(), subnets[1].String()) + + // IPv4 should be from block2 (10.100.0.16/28) + g.Expect(subnets[0].String()).To(gomega.Equal("10.100.0.16/31")) + // IPv6 should still be from block1 (fd00::/123) - index 8 + g.Expect(subnets[1].String()).To(gomega.Equal("fd00::10/127")) + } + } + + // Both families have 2 blocks because expandLayer2Allocator() adds blocks for both + // But the ALLOCATION is desynced: IPv4 from block2, IPv6 from block1 + v4RangeCount, v6RangeCount := allocator.Layer2RangeCount() + g.Expect(v4RangeCount).To(gomega.Equal(uint64(2)), "IPv4 should have 2 blocks") + g.Expect(v6RangeCount).To(gomega.Equal(uint64(2)), "IPv6 should have 2 blocks (both added, but only block1 used)") + + // Now release all L2 networks and watch the bug manifest + // When we release networks 1-8, block1 should be freed for both families + // But network 9's IPv4 is in block2 alone - the release will fail to find the owner! + for i := 0; i < 9; i++ { + allocator.ReleaseLayer2Subnet(l2Owners[i]) + } + + // Layer2 allocator removes ranges via FreeUnusedRanges() regardless of layer3 release success + v4RangeCount, v6RangeCount = allocator.Layer2RangeCount() + t.Logf("After release - layer2 IPv4 ranges: %d, IPv6 ranges: %d", v4RangeCount, v6RangeCount) + g.Expect(v4RangeCount).To(gomega.Equal(uint64(0))) + g.Expect(v6RangeCount).To(gomega.Equal(uint64(0))) + + // THE REAL BUG: Check layer3 allocator for leaked blocks via Usage() + // L2 used 2 blocks during allocation (block1 for networks 0-7, block2 for network 8's IPv4) + // If properly released, layer3 should have 0 allocated blocks + // If leaked, some blocks are still marked as allocated + l3v4Usage, l3v6Usage := allocator.Layer3Usage() + t.Logf("Layer3 usage - IPv4: %d, IPv6: %d (expected 0 each if no leak)", l3v4Usage, l3v6Usage) + + // BUG: Usage should be 0 after releasing all L2 networks + // If blocks leaked (weren't released back to layer3), usage will be > 0 + g.Expect(l3v4Usage).To(gomega.Equal(uint64(0)), "BUG: IPv4 blocks leaked in layer3 - usage should be 0 but got %d", l3v4Usage) + g.Expect(l3v6Usage).To(gomega.Equal(uint64(0)), "BUG: IPv6 blocks leaked in layer3 - usage should be 0 but got %d", l3v6Usage) +} + +func TestHybridConnectSubnetAllocator_getParentBlockCIDR(t *testing.T) { + // Test the mathematical derivation of parent block CIDR from a subnet + // The function masks the subnet IP to the networkPrefix boundary + tests := []struct { + name string + v4NetworkPrefix int + v6NetworkPrefix int + subnet string + expectedParent string + }{ + // IPv4 tests with /28 networkPrefix (using 10.0.0.0/8 range) + { + name: "IPv4 first address in block", + v4NetworkPrefix: 28, + subnet: "10.20.30.0/31", + expectedParent: "10.20.30.0/28", + }, + { + name: "IPv4 middle address in block", + v4NetworkPrefix: 28, + subnet: "10.20.30.6/31", + expectedParent: "10.20.30.0/28", + }, + { + name: "IPv4 last address in block", + v4NetworkPrefix: 28, + subnet: "10.20.30.14/31", + expectedParent: "10.20.30.0/28", + }, + { + name: "IPv4 second block first address", + v4NetworkPrefix: 28, + subnet: "10.20.30.16/31", + expectedParent: "10.20.30.16/28", + }, + { + name: "IPv4 second block middle address", + v4NetworkPrefix: 28, + subnet: "10.20.30.22/31", + expectedParent: "10.20.30.16/28", + }, + // IPv4 with /24 networkPrefix (using 172.16.0.0/12 range) + { + name: "IPv4 /24 prefix first block", + v4NetworkPrefix: 24, + subnet: "172.16.5.100/31", + expectedParent: "172.16.5.0/24", + }, + { + name: "IPv4 /24 prefix second block", + v4NetworkPrefix: 24, + subnet: "172.16.6.50/31", + expectedParent: "172.16.6.0/24", + }, + // IPv6 tests with /124 networkPrefix + { + name: "IPv6 first address in block", + v6NetworkPrefix: 124, + subnet: "2001:db8::0/127", + expectedParent: "2001:db8::/124", + }, + { + name: "IPv6 middle address in block", + v6NetworkPrefix: 124, + subnet: "2001:db8::6/127", + expectedParent: "2001:db8::/124", + }, + { + name: "IPv6 last address in block", + v6NetworkPrefix: 124, + subnet: "2001:db8::e/127", + expectedParent: "2001:db8::/124", + }, + { + name: "IPv6 second block", + v6NetworkPrefix: 124, + subnet: "2001:db8::10/127", + expectedParent: "2001:db8::10/124", + }, + { + name: "IPv6 second block middle", + v6NetworkPrefix: 124, + subnet: "2001:db8::1a/127", + expectedParent: "2001:db8::10/124", + }, + // IPv6 with /64 networkPrefix + { + name: "IPv6 /64 prefix", + v6NetworkPrefix: 64, + subnet: "2001:db8:cafe:1::abcd/127", + expectedParent: "2001:db8:cafe:1::/64", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + // Create allocator with the specified network prefixes + allocator := &hybridConnectSubnetAllocator{ + v4NetworkPrefix: tt.v4NetworkPrefix, + v6NetworkPrefix: tt.v6NetworkPrefix, + } + + subnet := mustParseCIDR(tt.subnet) + parentCIDR := allocator.getParentBlockCIDR(subnet) + + g.Expect(parentCIDR.String()).To(gomega.Equal(tt.expectedParent)) + }) + } +} + +func TestHybridConnectSubnetAllocator_getParentBlockCIDR_AllAddressesInBlockMapToSameParent(t *testing.T) { + // Verify that ALL addresses within a block map to the same parent + g := gomega.NewWithT(t) + + allocator := &hybridConnectSubnetAllocator{ + v4NetworkPrefix: 28, // /28 = 16 addresses (0-15) + } + + // All /31 subnets in 10.50.100.0/28 should map to the same parent + // Block has addresses .0 to .15, so /31 subnets are .0, .2, .4, .6, .8, .10, .12, .14 + expectedParent := "10.50.100.0/28" + for i := 0; i < 16; i += 2 { + subnet := mustParseCIDR(fmt.Sprintf("10.50.100.%d/31", i)) + parentCIDR := allocator.getParentBlockCIDR(subnet) + g.Expect(parentCIDR.String()).To(gomega.Equal(expectedParent), "Address 10.50.100.%d/31 should map to %s", i, expectedParent) + } + + // All /31 subnets in 10.50.100.16/28 should map to a different parent + expectedParent2 := "10.50.100.16/28" + for i := 16; i < 32; i += 2 { + subnet := mustParseCIDR(fmt.Sprintf("10.50.100.%d/31", i)) + parentCIDR := allocator.getParentBlockCIDR(subnet) + g.Expect(parentCIDR.String()).To(gomega.Equal(expectedParent2), "Address 10.50.100.%d/31 should map to %s", i, expectedParent2) + } +} + +// newAllocationCheck verifies new allocations don't conflict with marked subnets +type newAllocationCheck struct { + owner string + topology string // types.Layer3Topology or types.Layer2Topology + notIPv4 string // expected to NOT be this IPv4 CIDR + notIPv6 string // expected to NOT be this IPv6 CIDR +} + +func TestHybridConnectSubnetAllocator_MarkAllocatedSubnets(t *testing.T) { + tests := []struct { + name string + // ipv4Mode and ipv6Mode configure the IP mode + ipv4Mode bool + ipv6Mode bool + // allocatedSubnets is the map of owner -> subnets to mark as allocated + allocatedSubnets map[string][]*net.IPNet + // verifyAllocations checks that re-allocating returns exact same subnets + verifyAllocations []expectedSubnetAllocation + // verifyBlocks checks layer2 block state (expected block CIDRs in layer2BlockOwners) + verifyBlocks []string + // newAllocation verifies new allocations don't conflict + newAllocation *newAllocationCheck + }{ + { + name: "marks layer3 subnets - re-allocation returns same subnets", + ipv4Mode: true, + ipv6Mode: true, + allocatedSubnets: map[string][]*net.IPNet{ + // IPv6 /120 blocks within the fd00:10:244::/112 range + "layer3_1": {mustParseCIDR("192.168.0.0/24"), mustParseCIDR("fd00:10:244::/120")}, + "layer3_2": {mustParseCIDR("192.168.1.0/24"), mustParseCIDR("fd00:10:244::100/120")}, + }, + verifyAllocations: []expectedSubnetAllocation{ + {owner: "layer3_1", topology: types.Layer3Topology, ipv4: "192.168.0.0/24", ipv6: "fd00:10:244::/120"}, + {owner: "layer3_2", topology: types.Layer3Topology, ipv4: "192.168.1.0/24", ipv6: "fd00:10:244::100/120"}, + }, + newAllocation: &newAllocationCheck{ + owner: "layer3_3", + topology: types.Layer3Topology, + notIPv4: "192.168.0.0/24", + notIPv6: "fd00:10:244::/120", + }, + }, + { + name: "marks layer2 subnets - re-allocation returns same subnets and blocks are tracked", + ipv4Mode: true, + ipv6Mode: true, + allocatedSubnets: map[string][]*net.IPNet{ + // /127 subnets within the fd00:10:244::/120 block + "layer2_100": {mustParseCIDR("192.168.0.0/31"), mustParseCIDR("fd00:10:244::/127")}, + "layer2_101": {mustParseCIDR("192.168.0.2/31"), mustParseCIDR("fd00:10:244::2/127")}, + }, + verifyAllocations: []expectedSubnetAllocation{ + {owner: "layer2_100", topology: types.Layer2Topology, ipv4: "192.168.0.0/31", ipv6: "fd00:10:244::/127"}, + {owner: "layer2_101", topology: types.Layer2Topology, ipv4: "192.168.0.2/31", ipv6: "fd00:10:244::2/127"}, + }, + // In dual-stack, getL2BlocksKey creates combined key "v4,v6" with parent blocks + verifyBlocks: []string{"192.168.0.0/24,fd00:10:244::/120"}, + newAllocation: &newAllocationCheck{ + owner: "layer2_102", + topology: types.Layer2Topology, + notIPv4: "192.168.0.0/31", + notIPv6: "fd00:10:244::/127", + }, + }, + { + name: "marks mixed layer3 and layer2 subnets", + ipv4Mode: true, + ipv6Mode: true, + allocatedSubnets: map[string][]*net.IPNet{ + // IPv6 /120 blocks within the fd00:10:244::/112 range + "layer3_5": {mustParseCIDR("192.168.0.0/24"), mustParseCIDR("fd00:10:244::/120")}, + "layer2_6": {mustParseCIDR("192.168.1.0/31"), mustParseCIDR("fd00:10:244::100/127")}, + }, + verifyAllocations: []expectedSubnetAllocation{ + {owner: "layer3_5", topology: types.Layer3Topology, ipv4: "192.168.0.0/24", ipv6: "fd00:10:244::/120"}, + {owner: "layer2_6", topology: types.Layer2Topology, ipv4: "192.168.1.0/31", ipv6: "fd00:10:244::100/127"}, + }, + // In dual-stack, getL2BlocksKey creates combined key "v4,v6" with parent blocks + verifyBlocks: []string{"192.168.1.0/24,fd00:10:244::100/120"}, + }, + { + name: "marks IPv4-only layer3 subnets", + ipv4Mode: true, + ipv6Mode: false, + allocatedSubnets: map[string][]*net.IPNet{ + // Use subnets from 192.168.0.0/16 range which is the first range added + "layer3_10": {mustParseCIDR("192.168.0.0/24")}, + "layer3_11": {mustParseCIDR("192.168.1.0/24")}, + }, + verifyAllocations: []expectedSubnetAllocation{ + {owner: "layer3_10", topology: types.Layer3Topology, ipv4: "192.168.0.0/24"}, + {owner: "layer3_11", topology: types.Layer3Topology, ipv4: "192.168.1.0/24"}, + }, + }, + { + name: "marks layer2 subnets from multiple blocks with ref count tracking", + ipv4Mode: true, + ipv6Mode: false, + allocatedSubnets: map[string][]*net.IPNet{ + // All from same block: 192.168.0.0/24 + // This tests that multiple owners share the same block + "layer2_1": {mustParseCIDR("192.168.0.0/31")}, + "layer2_2": {mustParseCIDR("192.168.0.2/31")}, + "layer2_3": {mustParseCIDR("192.168.0.4/31")}, + }, + verifyAllocations: []expectedSubnetAllocation{ + {owner: "layer2_1", topology: types.Layer2Topology, ipv4: "192.168.0.0/31"}, + {owner: "layer2_2", topology: types.Layer2Topology, ipv4: "192.168.0.2/31"}, + {owner: "layer2_3", topology: types.Layer2Topology, ipv4: "192.168.0.4/31"}, + }, + verifyBlocks: []string{"192.168.0.0/24"}, + }, + { + name: "handles empty allocatedSubnets", + ipv4Mode: true, + ipv6Mode: true, + allocatedSubnets: map[string][]*net.IPNet{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = tt.ipv4Mode + config.IPv6Mode = tt.ipv6Mode + + connectSubnets := []networkconnectv1.ConnectSubnet{} + if tt.ipv4Mode { + connectSubnets = append(connectSubnets, networkconnectv1.ConnectSubnet{ + CIDR: networkconnectv1.CIDR("192.168.0.0/16"), + NetworkPrefix: 24, + }) + } + if tt.ipv6Mode { + connectSubnets = append(connectSubnets, networkconnectv1.ConnectSubnet{ + CIDR: networkconnectv1.CIDR("fd00:10:244::/112"), + NetworkPrefix: 120, // matches ipv4 /24: 32-24=8, 128-8=120 + }) + } + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Mark allocated subnets + err = allocator.MarkAllocatedSubnets(tt.allocatedSubnets) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Verify re-allocations return exact same subnets + for _, verify := range tt.verifyAllocations { + var subnets []*net.IPNet + var err error + if verify.topology == types.Layer2Topology { + subnets, err = allocator.AllocateLayer2Subnet(verify.owner) + } else { + subnets, err = allocator.AllocateLayer3Subnet(verify.owner) + } + g.Expect(err).ToNot(gomega.HaveOccurred(), + "re-allocation for %s should succeed", verify.owner) + + // Build map of allocated by type + allocatedByType := make(map[string]string) + for _, subnet := range subnets { + if subnet.IP.To4() != nil { + allocatedByType["ipv4"] = subnet.String() + } else { + allocatedByType["ipv6"] = subnet.String() + } + } + + if verify.ipv4 != "" { + g.Expect(allocatedByType["ipv4"]).To(gomega.Equal(verify.ipv4), + "owner %s: IPv4 should match exactly", verify.owner) + } + if verify.ipv6 != "" { + g.Expect(allocatedByType["ipv6"]).To(gomega.Equal(verify.ipv6), + "owner %s: IPv6 should match exactly", verify.owner) + } + } + + // Verify block tracking for layer2 + if len(tt.verifyBlocks) > 0 { + hca := allocator.(*hybridConnectSubnetAllocator) + + // Verify all expected blocks exist with proper owner names + for _, expectedKey := range tt.verifyBlocks { + blockOwner, exists := hca.layer2BlockOwners[expectedKey] + g.Expect(exists).To(gomega.BeTrue(), + "block %s should be tracked", expectedKey) + g.Expect(blockOwner).To(gomega.HavePrefix("l2-block-"), + "block %s should have a l2-block- prefix owner", expectedKey) + } + } + + // Verify new allocation doesn't conflict + if tt.newAllocation != nil { + var subnets []*net.IPNet + var err error + if tt.newAllocation.topology == types.Layer2Topology { + subnets, err = allocator.AllocateLayer2Subnet(tt.newAllocation.owner) + } else { + subnets, err = allocator.AllocateLayer3Subnet(tt.newAllocation.owner) + } + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).ToNot(gomega.BeEmpty()) + + for _, subnet := range subnets { + if subnet.IP.To4() != nil && tt.newAllocation.notIPv4 != "" { + g.Expect(subnet.String()).ToNot(gomega.Equal(tt.newAllocation.notIPv4), + "new allocation should not get %s", tt.newAllocation.notIPv4) + } + if subnet.IP.To4() == nil && tt.newAllocation.notIPv6 != "" { + g.Expect(subnet.String()).ToNot(gomega.Equal(tt.newAllocation.notIPv6), + "new allocation should not get %s", tt.newAllocation.notIPv6) + } + } + } + }) + } +} + +// TestHybridConnectSubnetAllocator_AfterMarkAllocatedSubnets_ReleaseWorks tests that after +// marking layer2 subnets, the release path works correctly and releases blocks +// back to layer3 when all owners are released. +func TestHybridConnectSubnetAllocator_AfterMarkAllocatedSubnets_ReleaseWorks(t *testing.T) { + g := gomega.NewWithT(t) + + config.IPv4Mode = true + config.IPv6Mode = false + + connectSubnets := []networkconnectv1.ConnectSubnet{ + { + CIDR: networkconnectv1.CIDR("192.168.0.0/16"), + NetworkPrefix: 24, + }, + } + // Initialize with range + allocator, err := NewHybridConnectSubnetAllocator(connectSubnets, "test-cnc") + g.Expect(err).ToNot(gomega.HaveOccurred()) + hca := allocator.(*hybridConnectSubnetAllocator) + + // Verify layer3 allocator starts with 0 usage + v4used, _ := hca.layer3Allocator.Usage() + g.Expect(v4used).To(gomega.Equal(uint64(0)), "layer3 allocator should start with 0 usage") + + // Mark two layer2 subnets from the same block + allocatedSubnets := map[string][]*net.IPNet{ + "layer2_1": {mustParseCIDR("192.168.0.0/31")}, + "layer2_2": {mustParseCIDR("192.168.0.2/31")}, + } + err = allocator.MarkAllocatedSubnets(allocatedSubnets) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + // Verify layer3 allocator now has 1 block used (the block for layer2) + v4used, _ = hca.layer3Allocator.Usage() + g.Expect(v4used).To(gomega.Equal(uint64(1)), "layer3 allocator should have 1 block used for layer2 block") + + // Verify initial state + blockCIDR := "192.168.0.0/24" + pbOwner := hca.layer2BlockOwners[blockCIDR] + g.Expect(pbOwner).ToNot(gomega.BeEmpty(), "block owner should be set") + g.Expect(pbOwner).To(gomega.HavePrefix("l2-block-"), "block owner should have l2-block- prefix") + // Store the initial block owner (could be layer2_1 or layer2_2 depending on map iteration order) + initialblockOwner := pbOwner + + // Release first owner - block should still exist, layer3 usage unchanged + allocator.ReleaseLayer2Subnet("layer2_1") + g.Expect(hca.layer2BlockOwners[blockCIDR]).ToNot(gomega.BeEmpty(), "block should still exist after first release") + g.Expect(hca.layer2BlockOwners[blockCIDR]).To(gomega.Equal(initialblockOwner), "block owner should remain the same after first release") + + // Verify layer3 allocator still has 1 block used (block not released yet) + v4used, _ = hca.layer3Allocator.Usage() + g.Expect(v4used).To(gomega.Equal(uint64(1)), "layer3 allocator should still have 1 block used after first release") + + // Release second owner - block should be released back to layer3 + allocator.ReleaseLayer2Subnet("layer2_2") + _, exists := hca.layer2BlockOwners[blockCIDR] + g.Expect(exists).To(gomega.BeFalse(), "block should be removed after all owners released") + + // Verify layer3 allocator now has 0 blocks used (block released) + v4used, _ = hca.layer3Allocator.Usage() + g.Expect(v4used).To(gomega.Equal(uint64(0)), "layer3 allocator should have 0 blocks after all layer2 owners released") + + // Now the block should be available for new layer3 allocation + // A new layer3 allocation should get 192.168.0.0/24 (the released block) + subnets, err := allocator.AllocateLayer3Subnet("layer3_new") + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(subnets).To(gomega.HaveLen(1)) + g.Expect(subnets[0].String()).To(gomega.Equal("192.168.0.0/24"), + "released block should be available for layer3 allocation") + + // Verify layer3 allocator now has 1 block used again + v4used, _ = hca.layer3Allocator.Usage() + g.Expect(v4used).To(gomega.Equal(uint64(1)), "layer3 allocator should have 1 block after new layer3 allocation") +} diff --git a/go-controller/pkg/clustermanager/node/subnet_allocator.go b/go-controller/pkg/clustermanager/node/subnet_allocator.go index 02cdca711a..0123125b59 100644 --- a/go-controller/pkg/clustermanager/node/subnet_allocator.go +++ b/go-controller/pkg/clustermanager/node/subnet_allocator.go @@ -3,6 +3,7 @@ package node import ( "fmt" "net" + "slices" "sync" "k8s.io/klog/v2" @@ -20,6 +21,8 @@ type SubnetAllocator interface { Usage() (uint64, uint64) // Count returns the number available (both used and unused) v4 and v6 subnets Count() (uint64, uint64) + // RangeCount returns the number of v4 and v6 ranges configured in the allocator + RangeCount() (uint64, uint64) AllocateNetworks(string) ([]*net.IPNet, error) AllocateIPv4Network(string) (*net.IPNet, error) AllocateIPv6Network(string) (*net.IPNet, error) @@ -30,6 +33,8 @@ type SubnetAllocator interface { ReleaseNetworks(string, ...*net.IPNet) error // ReleaseAllNetworks releases all networks owned by the given owner ReleaseAllNetworks(string) + // FreeUnusedRanges returns the list of unused ranges in the allocator + FreeUnusedRanges() []*net.IPNet } type BaseSubnetAllocator struct { @@ -45,6 +50,13 @@ func NewSubnetAllocator() SubnetAllocator { return &BaseSubnetAllocator{} } +// RangeCount returns the number of v4 and v6 ranges configured in the allocator +func (sna *BaseSubnetAllocator) RangeCount() (uint64, uint64) { + sna.Lock() + defer sna.Unlock() + return uint64(len(sna.v4ranges)), uint64(len(sna.v6ranges)) +} + // Usage returns the number of used/allocated v4 and v6 subnets func (sna *BaseSubnetAllocator) Usage() (uint64, uint64) { sna.Lock() @@ -222,6 +234,27 @@ func (sna *BaseSubnetAllocator) ReleaseAllNetworks(owner string) { sna.releaseAllNetworks(owner) } +func (sna *BaseSubnetAllocator) FreeUnusedRanges() []*net.IPNet { + sna.Lock() + defer sna.Unlock() + var freedSubnets []*net.IPNet + sna.v4ranges = slices.DeleteFunc(sna.v4ranges, func(snr *subnetAllocatorRange) bool { + if snr.usage() == 0 { + freedSubnets = append(freedSubnets, snr.network) + return true + } + return false + }) + sna.v6ranges = slices.DeleteFunc(sna.v6ranges, func(snr *subnetAllocatorRange) bool { + if snr.usage() == 0 { + freedSubnets = append(freedSubnets, snr.network) + return true + } + return false + }) + return freedSubnets +} + // releaseNetworks attempts to release all given subnets, even if a failure // occurs during release. It returns nil, or an aggregate error for any // failures that occurred. diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 886682ea3d..42d8764cb2 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -461,6 +461,7 @@ type OVNKubernetesFeatureConfig struct { EgressIPNodeHealthCheckPort int `gcfg:"egressip-node-healthcheck-port"` EnableMultiNetwork bool `gcfg:"enable-multi-network"` EnableNetworkSegmentation bool `gcfg:"enable-network-segmentation"` + EnableNetworkConnect bool `gcfg:"enable-network-connect"` EnablePreconfiguredUDNAddresses bool `gcfg:"enable-preconfigured-udn-addresses"` EnableRouteAdvertisements bool `gcfg:"enable-route-advertisements"` EnableMultiNetworkPolicy bool `gcfg:"enable-multi-networkpolicy"` @@ -673,8 +674,6 @@ var ( initGateways bool // legacy gateway-local CLI option gatewayLocal bool - // legacy disable-ovn-iface-id-ver CLI option - disableOVNIfaceIDVer bool ) func init() { @@ -1153,6 +1152,12 @@ var OVNK8sFeatureFlags = []cli.Flag{ Destination: &cliConfig.OVNKubernetesFeature.EnableNetworkSegmentation, Value: OVNKubernetesFeature.EnableNetworkSegmentation, }, + &cli.BoolFlag{ + Name: "enable-network-connect", + Usage: "Configure to use network connect feature with ovn-kubernetes.", + Destination: &cliConfig.OVNKubernetesFeature.EnableNetworkConnect, + Value: OVNKubernetesFeature.EnableNetworkConnect, + }, &cli.BoolFlag{ Name: "enable-preconfigured-udn-addresses", Usage: "Enable workloads connect to user-defined network with preconfigured addresses.", @@ -1724,11 +1729,6 @@ var OvnKubeNodeFlags = []cli.Flag{ Value: OvnKubeNode.MgmtPortDPResourceName, Destination: &cliConfig.OvnKubeNode.MgmtPortDPResourceName, }, - &cli.BoolFlag{ - Name: "disable-ovn-iface-id-ver", - Usage: "Deprecated; iface-id-ver is always enabled", - Destination: &disableOVNIfaceIDVer, - }, } // ClusterManagerFlags captures ovnkube-cluster-manager specific configurations diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 9a7d215d04..6127dff90e 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -234,6 +234,7 @@ egressip-node-healthcheck-port=1234 enable-multi-network=false enable-multi-networkpolicy=false enable-network-segmentation=false +enable-network-connect=false enable-preconfigured-udn-addresses=false enable-route-advertisements=false advertised-udn-isolation-mode=strict @@ -347,6 +348,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(0)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EnableNetworkSegmentation).To(gomega.BeFalse()) + gomega.Expect(OVNKubernetesFeature.EnableNetworkConnect).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EnablePreconfiguredUDNAddresses).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EnableRouteAdvertisements).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EnableMultiNetworkPolicy).To(gomega.BeFalse()) @@ -608,6 +610,7 @@ var _ = Describe("Config Operations", func() { "enable-multi-network=true", "enable-multi-networkpolicy=true", "enable-network-segmentation=true", + "enable-network-connect=true", "enable-preconfigured-udn-addresses=true", "enable-route-advertisements=true", "advertised-udn-isolation-mode=loose", @@ -704,6 +707,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(1234)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnableNetworkSegmentation).To(gomega.BeTrue()) + gomega.Expect(OVNKubernetesFeature.EnableNetworkConnect).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnablePreconfiguredUDNAddresses).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnableRouteAdvertisements).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.AdvertisedUDNIsolationMode).To(gomega.Equal(AdvertisedUDNIsolationModeLoose)) @@ -815,6 +819,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(4321)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnableNetworkSegmentation).To(gomega.BeTrue()) + gomega.Expect(OVNKubernetesFeature.EnableNetworkConnect).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnablePreconfiguredUDNAddresses).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnableRouteAdvertisements).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.AdvertisedUDNIsolationMode).To(gomega.Equal(AdvertisedUDNIsolationModeLoose)) @@ -892,6 +897,7 @@ var _ = Describe("Config Operations", func() { "-enable-multi-network=true", "-enable-multi-networkpolicy=true", "-enable-network-segmentation=true", + "-enable-network-connect=true", "-enable-preconfigured-udn-addresses=true", "-enable-route-advertisements=true", "-advertised-udn-isolation-mode=loose", diff --git a/go-controller/pkg/crd/clusternetworkconnect/v1/types.go b/go-controller/pkg/crd/clusternetworkconnect/v1/types.go index bb9242b06d..1f159ba122 100644 --- a/go-controller/pkg/crd/clusternetworkconnect/v1/types.go +++ b/go-controller/pkg/crd/clusternetworkconnect/v1/types.go @@ -77,6 +77,7 @@ type ClusterNetworkConnectSpec struct { // +required // +kubebuilder:validation:XValidation:rule="self == oldSelf", message="connectSubnets is immutable" // +kubebuilder:validation:XValidation:rule="size(self) != 2 || !isCIDR(self[0].cidr) || !isCIDR(self[1].cidr) || cidr(self[0].cidr).ip().family() != cidr(self[1].cidr).ip().family()", message="When 2 CIDRs are set, they must be from different IP families" + // +kubebuilder:validation:XValidation:rule="size(self) != 2 || !isCIDR(self[0].cidr) || !isCIDR(self[1].cidr) || cidr(self[0].cidr).ip().family() == cidr(self[1].cidr).ip().family() || (cidr(self[0].cidr).ip().family() == 4 ? (32 - self[0].networkPrefix) == (128 - self[1].networkPrefix) : (128 - self[0].networkPrefix) == (32 - self[1].networkPrefix))", message="For dual-stack, networkPrefix must have matching host bits: (32 - ipv4NetworkPrefix) must equal (128 - ipv6NetworkPrefix)" ConnectSubnets []ConnectSubnet `json:"connectSubnets"` // connectivity specifies which connectivity types should be enabled for the connected networks. diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/networkspec.go b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/networkspec.go index ea298e1e6c..fa3320e318 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/networkspec.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/networkspec.go @@ -24,10 +24,12 @@ import ( // NetworkSpecApplyConfiguration represents a declarative configuration of the NetworkSpec type for use // with apply. type NetworkSpecApplyConfiguration struct { - Topology *userdefinednetworkv1.NetworkTopology `json:"topology,omitempty"` - Layer3 *Layer3ConfigApplyConfiguration `json:"layer3,omitempty"` - Layer2 *Layer2ConfigApplyConfiguration `json:"layer2,omitempty"` - Localnet *LocalnetConfigApplyConfiguration `json:"localnet,omitempty"` + Topology *userdefinednetworkv1.NetworkTopology `json:"topology,omitempty"` + Layer3 *Layer3ConfigApplyConfiguration `json:"layer3,omitempty"` + Layer2 *Layer2ConfigApplyConfiguration `json:"layer2,omitempty"` + Localnet *LocalnetConfigApplyConfiguration `json:"localnet,omitempty"` + Transport *userdefinednetworkv1.TransportOption `json:"transport,omitempty"` + NoOverlayOptions *NoOverlayOptionsApplyConfiguration `json:"noOverlayOptions,omitempty"` } // NetworkSpecApplyConfiguration constructs a declarative configuration of the NetworkSpec type for use with @@ -67,3 +69,19 @@ func (b *NetworkSpecApplyConfiguration) WithLocalnet(value *LocalnetConfigApplyC b.Localnet = value return b } + +// WithTransport sets the Transport field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Transport field is set to the value of the last call. +func (b *NetworkSpecApplyConfiguration) WithTransport(value userdefinednetworkv1.TransportOption) *NetworkSpecApplyConfiguration { + b.Transport = &value + return b +} + +// WithNoOverlayOptions sets the NoOverlayOptions field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the NoOverlayOptions field is set to the value of the last call. +func (b *NetworkSpecApplyConfiguration) WithNoOverlayOptions(value *NoOverlayOptionsApplyConfiguration) *NetworkSpecApplyConfiguration { + b.NoOverlayOptions = value + return b +} diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/nooverlayoptions.go b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/nooverlayoptions.go new file mode 100644 index 0000000000..eb91057663 --- /dev/null +++ b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/nooverlayoptions.go @@ -0,0 +1,51 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + userdefinednetworkv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" +) + +// NoOverlayOptionsApplyConfiguration represents a declarative configuration of the NoOverlayOptions type for use +// with apply. +type NoOverlayOptionsApplyConfiguration struct { + OutboundSNAT *userdefinednetworkv1.SNATOption `json:"outboundSNAT,omitempty"` + Routing *userdefinednetworkv1.RoutingOption `json:"routing,omitempty"` +} + +// NoOverlayOptionsApplyConfiguration constructs a declarative configuration of the NoOverlayOptions type for use with +// apply. +func NoOverlayOptions() *NoOverlayOptionsApplyConfiguration { + return &NoOverlayOptionsApplyConfiguration{} +} + +// WithOutboundSNAT sets the OutboundSNAT field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the OutboundSNAT field is set to the value of the last call. +func (b *NoOverlayOptionsApplyConfiguration) WithOutboundSNAT(value userdefinednetworkv1.SNATOption) *NoOverlayOptionsApplyConfiguration { + b.OutboundSNAT = &value + return b +} + +// WithRouting sets the Routing field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Routing field is set to the value of the last call. +func (b *NoOverlayOptionsApplyConfiguration) WithRouting(value userdefinednetworkv1.RoutingOption) *NoOverlayOptionsApplyConfiguration { + b.Routing = &value + return b +} diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/utils.go b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/utils.go index 3d3fda33cd..c28c93a108 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/utils.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/utils.go @@ -51,6 +51,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &userdefinednetworkv1.LocalnetConfigApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("NetworkSpec"): return &userdefinednetworkv1.NetworkSpecApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("NoOverlayOptions"): + return &userdefinednetworkv1.NoOverlayOptionsApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("UserDefinedNetwork"): return &userdefinednetworkv1.UserDefinedNetworkApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("UserDefinedNetworkSpec"): diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/cudn.go b/go-controller/pkg/crd/userdefinednetwork/v1/cudn.go index ace64a06c9..6c3510a979 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/cudn.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/cudn.go @@ -33,6 +33,9 @@ type ClusterUserDefinedNetworkSpec struct { // +kubebuilder:validation:XValidation:rule="has(self.topology) && self.topology == 'Layer3' ? has(self.layer3): !has(self.layer3)", message="spec.layer3 is required when topology is Layer3 and forbidden otherwise" // +kubebuilder:validation:XValidation:rule="has(self.topology) && self.topology == 'Layer2' ? has(self.layer2): !has(self.layer2)", message="spec.layer2 is required when topology is Layer2 and forbidden otherwise" // +kubebuilder:validation:XValidation:rule="has(self.topology) && self.topology == 'Localnet' ? has(self.localnet): !has(self.localnet)", message="spec.localnet is required when topology is Localnet and forbidden otherwise" + // +kubebuilder:validation:XValidation:rule="!has(self.transport) || self.transport != 'NoOverlay' || (self.topology == 'Layer3' && has(self.layer3) && self.layer3.role == 'Primary')", message="transport 'NoOverlay' is only supported for Layer3 primary networks" + // +kubebuilder:validation:XValidation:rule="!has(self.transport) || self.transport != 'NoOverlay' || has(self.noOverlayOptions)", message="noOverlayOptions is required when transport is 'NoOverlay'" + // +kubebuilder:validation:XValidation:rule="self.transport == 'NoOverlay' || !has(self.noOverlayOptions)", message="noOverlayOptions is forbidden when transport is not 'NoOverlay'" // +kubebuilder:validation:XValidation:rule="self == oldSelf", message="Network spec is immutable" // +required Network NetworkSpec `json:"network"` @@ -65,6 +68,19 @@ type NetworkSpec struct { // Localnet is the Localnet topology configuration. // +optional Localnet *LocalnetConfig `json:"localnet,omitempty"` + + // Transport describes the transport technology for pod-to-pod traffic. + // Allowed values are "NoOverlay" and "Geneve". + // - "NoOverlay": The network operates in no-overlay mode. + // - "Geneve": The network uses Geneve overlay. + // When omitted, the default behaviour is Geneve. + // +kubebuilder:validation:Enum=NoOverlay;Geneve + // +optional + Transport TransportOption `json:"transport,omitempty"` + // NoOverlayOptions contains configuration for no-overlay mode. + // This is only allowed when Transport is "NoOverlay". + // +optional + NoOverlayOptions *NoOverlayOptions `json:"noOverlayOptions,omitempty"` } // ClusterUserDefinedNetworkStatus contains the observed status of the ClusterUserDefinedNetwork. @@ -218,3 +234,30 @@ type VLANConfig struct { // +optional Access *AccessVLANConfig `json:"access"` } + +type TransportOption string +type SNATOption string +type RoutingOption string + +const ( + TransportOptionNoOverlay TransportOption = "NoOverlay" + TransportOptionGeneve TransportOption = "Geneve" + + SNATEnabled SNATOption = "Enabled" + SNATDisabled SNATOption = "Disabled" + + RoutingManaged RoutingOption = "Managed" + RoutingUnmanaged RoutingOption = "Unmanaged" +) + +// NoOverlayOptions contains configuration options for networks operating in no-overlay mode. +type NoOverlayOptions struct { + // OutboundSNAT defines the SNAT behavior for outbound traffic from pods. + // +kubebuilder:validation:Enum=Enabled;Disabled + // +required + OutboundSNAT SNATOption `json:"outboundSNAT"` + // Routing specifies whether the pod network routing is managed by OVN-Kubernetes or users. + // +kubebuilder:validation:Enum=Managed;Unmanaged + // +required + Routing RoutingOption `json:"routing"` +} diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/zz_generated.deepcopy.go b/go-controller/pkg/crd/userdefinednetwork/v1/zz_generated.deepcopy.go index 0d0809a677..ee6487a6c4 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/zz_generated.deepcopy.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/zz_generated.deepcopy.go @@ -341,6 +341,11 @@ func (in *NetworkSpec) DeepCopyInto(out *NetworkSpec) { *out = new(LocalnetConfig) (*in).DeepCopyInto(*out) } + if in.NoOverlayOptions != nil { + in, out := &in.NoOverlayOptions, &out.NoOverlayOptions + *out = new(NoOverlayOptions) + **out = **in + } return } @@ -354,6 +359,22 @@ func (in *NetworkSpec) DeepCopy() *NetworkSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NoOverlayOptions) DeepCopyInto(out *NoOverlayOptions) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NoOverlayOptions. +func (in *NoOverlayOptions) DeepCopy() *NoOverlayOptions { + if in == nil { + return nil + } + out := new(NoOverlayOptions) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *UserDefinedNetwork) DeepCopyInto(out *UserDefinedNetwork) { *out = *in diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 4c7b0c6618..f14d0eafc6 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -63,6 +63,10 @@ import ( adminbasedpolicyscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme" adminbasedpolicyinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions" adminpolicybasedrouteinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1" + networkconnectapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + networkconnectscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned/scheme" + networkconnectinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/informers/externalversions" + networkconnectinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/informers/externalversions/clusternetworkconnect/v1" egressfirewallapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressfirewallscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/scheme" egressfirewallinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/informers/externalversions" @@ -121,6 +125,7 @@ type WatchFactory struct { ipamClaimsFactory ipamclaimsfactory.SharedInformerFactory nadFactory nadinformerfactory.SharedInformerFactory udnFactory userdefinednetworkapiinformerfactory.SharedInformerFactory + cncFactory networkconnectinformerfactory.SharedInformerFactory raFactory routeadvertisementsinformerfactory.SharedInformerFactory frrFactory frrinformerfactory.SharedInformerFactory networkQoSFactory networkqosinformerfactory.SharedInformerFactory @@ -149,6 +154,7 @@ func (wf *WatchFactory) ShallowClone() *WatchFactory { ipamClaimsFactory: wf.ipamClaimsFactory, nadFactory: wf.nadFactory, udnFactory: wf.udnFactory, + cncFactory: wf.cncFactory, raFactory: wf.raFactory, frrFactory: wf.frrFactory, networkQoSFactory: wf.networkQoSFactory, @@ -247,7 +253,7 @@ var ( UserDefinedNetworkType reflect.Type = reflect.TypeOf(&userdefinednetworkapi.UserDefinedNetwork{}) ClusterUserDefinedNetworkType reflect.Type = reflect.TypeOf(&userdefinednetworkapi.ClusterUserDefinedNetwork{}) NetworkQoSType reflect.Type = reflect.TypeOf(&networkqosapi.NetworkQoS{}) - + ClusterNetworkConnectType reflect.Type = reflect.TypeOf(&networkconnectapi.ClusterNetworkConnect{}) // Resource types used in ovnk node NamespaceExGwType reflect.Type = reflect.TypeOf(&namespaceExGw{}) EndpointSliceForStaleConntrackRemovalType reflect.Type = reflect.TypeOf(&endpointSliceForStaleConntrackRemoval{}) @@ -620,6 +626,13 @@ func (wf *WatchFactory) Start() error { } } + if wf.cncFactory != nil { + wf.cncFactory.Start(wf.stopChan) + if err := waitForCacheSyncWithTimeout(wf.cncFactory, wf.stopChan); err != nil { + return err + } + } + if wf.raFactory != nil { wf.raFactory.Start(wf.stopChan) if err := waitForCacheSyncWithTimeout(wf.raFactory, wf.stopChan); err != nil { @@ -676,6 +689,10 @@ func (wf *WatchFactory) Stop() { wf.udnFactory.Shutdown() } + if wf.cncFactory != nil { + wf.cncFactory.Shutdown() + } + if wf.raFactory != nil { wf.raFactory.Shutdown() } @@ -900,6 +917,9 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset if err := userdefinednetworkapi.AddToScheme(userdefinednetworkscheme.Scheme); err != nil { return nil, err } + if err := networkconnectapi.AddToScheme(networkconnectscheme.Scheme); err != nil { + return nil, err + } if err := routeadvertisementsapi.AddToScheme(routeadvertisementsscheme.Scheme); err != nil { return nil, err } @@ -1048,6 +1068,19 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset wf.iFactory.Core().V1().Pods().Informer() } + if util.IsNetworkConnectEnabled() { + wf.cncFactory = networkconnectinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkConnectClient, resyncInterval) + wf.informers[ClusterNetworkConnectType], err = newQueuedInformer(eventQueueSize, + ClusterNetworkConnectType, + wf.cncFactory.K8s().V1().ClusterNetworkConnects().Informer(), + wf.stopChan, minNumEventQueues) + if err != nil { + return nil, err + } + // make sure namespace informer cache is initialized and synced on Start(). + wf.iFactory.Core().V1().Namespaces().Informer() + } + if util.IsRouteAdvertisementsEnabled() { wf.informers[NamespaceType], err = newQueuedInformer(eventQueueSize, NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer(), wf.stopChan, defaultNumEventQueues) @@ -1152,6 +1185,10 @@ func getObjectMeta(objType reflect.Type, obj interface{}) (*metav1.ObjectMeta, e if cudn, ok := obj.(*userdefinednetworkapi.ClusterUserDefinedNetwork); ok { return &cudn.ObjectMeta, nil } + case ClusterNetworkConnectType: + if cnc, ok := obj.(*networkconnectapi.ClusterNetworkConnect); ok { + return &cnc.ObjectMeta, nil + } case NetworkQoSType: if networkQoS, ok := obj.(*networkqosapi.NetworkQoS); ok { return &networkQoS.ObjectMeta, nil @@ -1767,6 +1804,10 @@ func (wf *WatchFactory) ClusterUserDefinedNetworkInformer() userdefinednetworkin return wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks() } +func (wf *WatchFactory) ClusterNetworkConnectInformer() networkconnectinformer.ClusterNetworkConnectInformer { + return wf.cncFactory.K8s().V1().ClusterNetworkConnects() +} + func (wf *WatchFactory) DNSNameResolverInformer() ocpnetworkinformerv1alpha1.DNSNameResolverInformer { return wf.dnsFactory.Network().V1alpha1().DNSNameResolvers() } diff --git a/go-controller/pkg/factory/handler.go b/go-controller/pkg/factory/handler.go index 50563b3278..c648e7f4bc 100644 --- a/go-controller/pkg/factory/handler.go +++ b/go-controller/pkg/factory/handler.go @@ -22,6 +22,7 @@ import ( "k8s.io/klog/v2" anplister "sigs.k8s.io/network-policy-api/pkg/client/listers/apis/v1alpha1" + networkconnectlister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/listers/clusternetworkconnect/v1" egressfirewalllister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/listers/egressfirewall/v1" egressiplister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/listers/egressip/v1" egressqoslister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/listers/egressqos/v1" @@ -509,6 +510,8 @@ func newInformerLister(oType reflect.Type, sharedInformer cache.SharedIndexInfor return userdefinednetworklister.NewUserDefinedNetworkLister(sharedInformer.GetIndexer()), nil case ClusterUserDefinedNetworkType: return userdefinednetworklister.NewClusterUserDefinedNetworkLister(sharedInformer.GetIndexer()), nil + case ClusterNetworkConnectType: + return networkconnectlister.NewClusterNetworkConnectLister(sharedInformer.GetIndexer()), nil case NetworkQoSType: return networkqoslister.NewNetworkQoSLister(sharedInformer.GetIndexer()), nil } diff --git a/go-controller/pkg/kube/mocks/InterfaceOVN.go b/go-controller/pkg/kube/mocks/InterfaceOVN.go index 4d5c8e56b6..9b95af44ac 100644 --- a/go-controller/pkg/kube/mocks/InterfaceOVN.go +++ b/go-controller/pkg/kube/mocks/InterfaceOVN.go @@ -307,24 +307,6 @@ func (_m *InterfaceOVN) PatchNode(old *apicorev1.Node, new *apicorev1.Node) erro return r0 } -// RemoveTaintFromNode provides a mock function with given fields: nodeName, taint -func (_m *InterfaceOVN) RemoveTaintFromNode(nodeName string, taint *apicorev1.Taint) error { - ret := _m.Called(nodeName, taint) - - if len(ret) == 0 { - panic("no return value specified for RemoveTaintFromNode") - } - - var r0 error - if rf, ok := ret.Get(0).(func(string, *apicorev1.Taint) error); ok { - r0 = rf(nodeName, taint) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // SetAnnotationsOnNamespace provides a mock function with given fields: namespaceName, annotations func (_m *InterfaceOVN) SetAnnotationsOnNamespace(namespaceName string, annotations map[string]interface{}) error { ret := _m.Called(namespaceName, annotations) diff --git a/go-controller/pkg/kubevirt/dhcp.go b/go-controller/pkg/kubevirt/dhcp.go index 51cb600ebd..414adaf0d2 100644 --- a/go-controller/pkg/kubevirt/dhcp.go +++ b/go-controller/pkg/kubevirt/dhcp.go @@ -71,19 +71,6 @@ func WithIPv6DNSServer(dnsServer string) func(*dhcpConfigs) { } } -func EnsureDHCPOptionsForMigratablePod(controllerName string, nbClient libovsdbclient.Client, watchFactory *factory.WatchFactory, pod *corev1.Pod, ips []*net.IPNet, lsp *nbdb.LogicalSwitchPort) error { - dnsServerIPv4, dnsServerIPv6, err := RetrieveDNSServiceClusterIPs(watchFactory) - if err != nil { - return fmt.Errorf("failed retrieving dns service cluster ip: %v", err) - } - - return EnsureDHCPOptionsForLSP(controllerName, nbClient, pod, ips, lsp, - WithIPv4Router(ARPProxyIPv4), - WithIPv4DNSServer(dnsServerIPv4), - WithIPv6DNSServer(dnsServerIPv6), - ) -} - func EnsureDHCPOptionsForLSP(controllerName string, nbClient libovsdbclient.Client, pod *corev1.Pod, ips []*net.IPNet, lsp *nbdb.LogicalSwitchPort, opts ...DHCPConfigsOpt) error { vmKey := ExtractVMNameFromPod(pod) if vmKey == nil { diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index edc1744be5..a9f848171c 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -1273,7 +1273,12 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { // Stop gracefully stops the controller // deleteLogicalEntities will never be true for default network func (nc *DefaultNodeNetworkController) Stop() { + if nc.stopChan == nil { + klog.Infof("Default node network controller is already stopped") + return + } close(nc.stopChan) + nc.stopChan = nil nc.wg.Wait() } diff --git a/go-controller/pkg/node/default_node_network_controller_test.go b/go-controller/pkg/node/default_node_network_controller_test.go index 6ecd5bdeb6..ccfa18af0e 100644 --- a/go-controller/pkg/node/default_node_network_controller_test.go +++ b/go-controller/pkg/node/default_node_network_controller_test.go @@ -1596,7 +1596,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } }) Context("when gateway interface is set to derive-from-mgmt-port", func() { - It("should resolve gateway interface from PCI address successfully", func() { + ovntest.OnSupportedPlatformsIt("should resolve gateway interface from PCI address successfully", func() { // Mock getManagementPortNetDev to return the management port device netlinkOpsMock.On("LinkByName", mgmtPortNetdev).Return(netlinkLinkMock, nil) netlinkLinkMock.On("Attrs").Return(&netlink.LinkAttrs{ @@ -1636,7 +1636,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Expect(selectedNetdev).To(Equal(expectedGatewayIntf)) }) - It("should return error when no network devices found for PCI address", func() { + ovntest.OnSupportedPlatformsIt("should return error when no network devices found for PCI address", func() { // Mock getManagementPortNetDev to return the management port device netlinkOpsMock.On("LinkByName", mgmtPortNetdev).Return(netlinkLinkMock, nil) netlinkLinkMock.On("Attrs").Return(&netlink.LinkAttrs{ @@ -1670,7 +1670,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Expect(netdevs).To(BeEmpty()) }) - It("should return error when GetPciFromNetDevice fails", func() { + ovntest.OnSupportedPlatformsIt("should return error when GetPciFromNetDevice fails", func() { // Mock getManagementPortNetDev to return the management port device netlinkOpsMock.On("LinkByName", mgmtPortNetdev).Return(netlinkLinkMock, nil) netlinkLinkMock.On("Attrs").Return(&netlink.LinkAttrs{ @@ -1689,7 +1689,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Expect(err.Error()).To(ContainSubstring("failed to get PCI address")) }) - It("should return error when GetPfPciFromVfPci fails", func() { + ovntest.OnSupportedPlatformsIt("should return error when GetPfPciFromVfPci fails", func() { // Mock getManagementPortNetDev to return the management port device netlinkOpsMock.On("LinkByName", mgmtPortNetdev).Return(netlinkLinkMock, nil) netlinkLinkMock.On("Attrs").Return(&netlink.LinkAttrs{ @@ -1714,7 +1714,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Expect(err.Error()).To(ContainSubstring("failed to get PF PCI address")) }) - It("should return error when GetNetDevicesFromPci fails", func() { + ovntest.OnSupportedPlatformsIt("should return error when GetNetDevicesFromPci fails", func() { // Mock getManagementPortNetDev to return the management port device netlinkOpsMock.On("LinkByName", mgmtPortNetdev).Return(netlinkLinkMock, nil) netlinkLinkMock.On("Attrs").Return(&netlink.LinkAttrs{ diff --git a/go-controller/pkg/node/gateway_udn_test.go b/go-controller/pkg/node/gateway_udn_test.go index c302e33bf3..85eddb0cef 100644 --- a/go-controller/pkg/node/gateway_udn_test.go +++ b/go-controller/pkg/node/gateway_udn_test.go @@ -1715,7 +1715,7 @@ var _ = Describe("UserDefinedNetworkGateway", func() { Expect(fexec.CalledMatchesExpected()).To(BeTrue(), fexec.ErrorDesc) }) - It("should sync node port watcher successfully if a namespaces network is invalid", func() { + ovntest.OnSupportedPlatformsIt("should sync node port watcher successfully if a namespaces network is invalid", func() { // create new gateway, add ns with primary UDN, pod, expose pod via Node port service, delete pod, delete udn, ensure sync should succeeds namespace := util.NewNamespace("udn") config.OVNKubernetesFeature.EnableMultiNetwork = true @@ -1745,6 +1745,9 @@ var _ = Describe("UserDefinedNetworkGateway", func() { }) func TestConstructUDNVRFIPRules(t *testing.T) { + if ovntest.NoRoot() { + t.Skip("Test requires root privileges") + } type testRule struct { priority int family int @@ -1935,6 +1938,9 @@ func TestConstructUDNVRFIPRules(t *testing.T) { } func TestConstructUDNVRFIPRulesPodNetworkAdvertisedToDefaultVRF(t *testing.T) { + if ovntest.NoRoot() { + t.Skip("Test requires root privileges") + } type testRule struct { priority int family int @@ -2118,6 +2124,9 @@ func TestConstructUDNVRFIPRulesPodNetworkAdvertisedToDefaultVRF(t *testing.T) { } func TestConstructUDNVRFIPRulesPodNetworkAdvertisedToNonDefaultVRF(t *testing.T) { + if ovntest.NoRoot() { + t.Skip("Test requires root privileges") + } type testRule struct { priority int family int diff --git a/go-controller/pkg/node/user_defined_node_network_controller.go b/go-controller/pkg/node/user_defined_node_network_controller.go index 1c1a10b3ee..9e88801082 100644 --- a/go-controller/pkg/node/user_defined_node_network_controller.go +++ b/go-controller/pkg/node/user_defined_node_network_controller.go @@ -92,8 +92,13 @@ func (nc *UserDefinedNodeNetworkController) Start(_ context.Context) error { // Stop gracefully stops the controller func (nc *UserDefinedNodeNetworkController) Stop() { + if nc.stopChan == nil { + klog.Infof("UDN node network controller for network %s is already stopped", nc.GetNetworkName()) + return + } klog.Infof("Stopping UDN node network controller for network %s", nc.GetNetworkName()) close(nc.stopChan) + nc.stopChan = nil nc.wg.Wait() if nc.podHandler != nil { diff --git a/go-controller/pkg/node/user_defined_node_network_controller_test.go b/go-controller/pkg/node/user_defined_node_network_controller_test.go index 73aa24ccd5..e014bab9fe 100644 --- a/go-controller/pkg/node/user_defined_node_network_controller_test.go +++ b/go-controller/pkg/node/user_defined_node_network_controller_test.go @@ -68,7 +68,7 @@ var _ = Describe("UserDefinedNodeNetworkController", func() { ovntest.DelLink("breth0") }) - It("ensure UDNGateway is not invoked when feature gate is OFF", func() { + ovntest.OnSupportedPlatformsIt("ensure UDNGateway is not invoked when feature gate is OFF", func() { config.OVNKubernetesFeature.EnableNetworkSegmentation = false config.OVNKubernetesFeature.EnableMultiNetwork = true factoryMock := factoryMocks.NodeWatchFactory{} @@ -93,7 +93,7 @@ var _ = Describe("UserDefinedNodeNetworkController", func() { Expect(err).NotTo(HaveOccurred()) Expect(controller.gateway).To(BeNil()) }) - It("ensure UDNGateway is invoked for Primary UDNs when feature gate is ON", func() { + ovntest.OnSupportedPlatformsIt("ensure UDNGateway is invoked for Primary UDNs when feature gate is ON", func() { config.OVNKubernetesFeature.EnableNetworkSegmentation = true config.OVNKubernetesFeature.EnableMultiNetwork = true factoryMock := factoryMocks.NodeWatchFactory{} @@ -125,7 +125,7 @@ var _ = Describe("UserDefinedNodeNetworkController", func() { Expect(err.Error()).To(ContainSubstring("could not create management port"), err.Error()) Expect(controller.gateway).To(Not(BeNil())) }) - It("ensure UDNGateway is not invoked for Primary UDNs when feature gate is ON but network is not Primary", func() { + ovntest.OnSupportedPlatformsIt("ensure UDNGateway is not invoked for Primary UDNs when feature gate is ON but network is not Primary", func() { config.OVNKubernetesFeature.EnableNetworkSegmentation = true config.OVNKubernetesFeature.EnableMultiNetwork = true factoryMock := factoryMocks.NodeWatchFactory{} diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index c39d408048..1558ada10d 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -230,7 +230,8 @@ func (oc *BaseNetworkController) reconcile(netInfo util.NetInfo, setNodeFailed f // provided on the arguments of the method. This method returns no error and logs them // instead since once the controller NetInfo has been updated there is no point in retrying. func (oc *BaseNetworkController) doReconcile(reconcileRoutes, reconcilePendingPods bool, - reconcileNodes []string, setNodeFailed func(string), reconcileNamespaces []string) { + reconcileNodes []string, setNodeFailed func(string), reconcileNamespaces []string, +) { if reconcileRoutes { err := oc.routeImportManager.ReconcileNetwork(oc.GetNetworkName()) if err != nil { @@ -322,7 +323,8 @@ func getNetworkControllerName(netName string) string { // NewCommonNetworkControllerInfo creates CommonNetworkControllerInfo shared by controllers func NewCommonNetworkControllerInfo(client clientset.Interface, kube *kube.KubeOVN, wf *factory.WatchFactory, recorder record.EventRecorder, nbClient libovsdbclient.Client, sbClient libovsdbclient.Client, - podRecorder *metrics.PodRecorder, SCTPSupport, multicastSupport, svcTemplateSupport bool) (*CommonNetworkControllerInfo, error) { + podRecorder *metrics.PodRecorder, SCTPSupport, multicastSupport, svcTemplateSupport bool, +) (*CommonNetworkControllerInfo, error) { zone, err := libovsdbutil.GetNBZone(nbClient) if err != nil { return nil, fmt.Errorf("error getting NB zone name : err - %w", err) @@ -351,7 +353,8 @@ func (bnc *BaseNetworkController) GetLogicalPortName(pod *corev1.Pod, nadName st } func (bnc *BaseNetworkController) AddConfigDurationRecord(kind, namespace, name string) ( - []ovsdb.Operation, func(), time.Time, error) { + []ovsdb.Operation, func(), time.Time, error, +) { if !bnc.IsUserDefinedNetwork() { return recorders.GetConfigDurationRecorder().AddOVN(bnc.nbClient, kind, namespace, name) } @@ -518,7 +521,8 @@ func (bnc *BaseNetworkController) syncNodeClusterRouterPort(node *corev1.Node, h } func (bnc *BaseNetworkController) createNodeLogicalSwitch(nodeName string, hostSubnets []*net.IPNet, - clusterLoadBalancerGroupUUID, switchLoadBalancerGroupUUID string) error { + clusterLoadBalancerGroupUUID, switchLoadBalancerGroupUUID string, +) error { // logical router port MAC is based on IPv4 subnet if there is one, else IPv6 var nodeLRPMAC net.HardwareAddr switchName := bnc.GetNetworkScopedSwitchName(nodeName) @@ -544,8 +548,7 @@ func (bnc *BaseNetworkController) createNodeLogicalSwitch(nodeName string, hostS if utilnet.IsIPv6CIDR(hostSubnet) { v6Gateway = gwIfAddr.IP - logicalSwitch.OtherConfig["ipv6_prefix"] = - hostSubnet.IP.String() + logicalSwitch.OtherConfig["ipv6_prefix"] = hostSubnet.IP.String() } else { v4Gateway = gwIfAddr.IP excludeIPs := mgmtIfAddr.IP.String() @@ -1003,7 +1006,6 @@ func (bnc *BaseNetworkController) isLocalZoneNode(node *corev1.Node) bool { // GetNetworkRole returns the role of this controller's network for the given pod func (bnc *BaseNetworkController) GetNetworkRole(pod *corev1.Pod) (string, error) { - role, err := util.GetNetworkRole(bnc.GetNetInfo(), bnc.networkManager.GetActiveNetworkForNamespace, pod) if err != nil { if util.IsUnprocessedActiveNetworkError(err) { @@ -1161,8 +1163,8 @@ func (bnc *BaseNetworkController) newNetworkQoSController() error { } func initLoadBalancerGroups(nbClient libovsdbclient.Client, netInfo util.NetInfo) ( - clusterLoadBalancerGroupUUID, switchLoadBalancerGroupUUID, routerLoadBalancerGroupUUID string, err error) { - + clusterLoadBalancerGroupUUID, switchLoadBalancerGroupUUID, routerLoadBalancerGroupUUID string, err error, +) { loadBalancerGroupName := netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterLBGroupName) clusterLBGroup := nbdb.LoadBalancerGroup{Name: loadBalancerGroupName} ops, err := libovsdbops.CreateOrUpdateLoadBalancerGroupOps(nbClient, nil, &clusterLBGroup) @@ -1247,3 +1249,25 @@ func (bnc *BaseNetworkController) GetSamplingConfig() *libovsdbops.SamplingConfi } return nil } + +func (bnc *BaseNetworkController) ensureDHCP(pod *corev1.Pod, podAnnotation *util.PodAnnotation, lsp *nbdb.LogicalSwitchPort) error { + opts := []kubevirt.DHCPConfigsOpt{} + + ipv4DNSServer, ipv6DNSServer, err := kubevirt.RetrieveDNSServiceClusterIPs(bnc.watchFactory) + if err != nil { + return err + } + + ipv4Gateway, _ := util.MatchFirstIPFamily(false /*ipv4*/, podAnnotation.Gateways) + if ipv4Gateway != nil { + opts = append(opts, kubevirt.WithIPv4Router(ipv4Gateway.String())) + } + + if bnc.MTU() > 0 { + opts = append(opts, kubevirt.WithIPv4MTU(bnc.MTU())) + } + + opts = append(opts, kubevirt.WithIPv4DNSServer(ipv4DNSServer), kubevirt.WithIPv6DNSServer(ipv6DNSServer)) + + return kubevirt.EnsureDHCPOptionsForLSP(bnc.controllerName, bnc.nbClient, pod, podAnnotation.IPs, lsp, opts...) +} diff --git a/go-controller/pkg/ovn/base_network_controller_user_defined.go b/go-controller/pkg/ovn/base_network_controller_user_defined.go index da26a58350..82a80bc31c 100644 --- a/go-controller/pkg/ovn/base_network_controller_user_defined.go +++ b/go-controller/pkg/ovn/base_network_controller_user_defined.go @@ -227,7 +227,6 @@ func (bsnc *BaseUserDefinedNetworkController) DeleteUserDefinedNetworkResourceCo // ensurePodForUserDefinedNetwork tries to set up the User Defined Network for a pod. It returns nil on success and error // on failure; failure indicates the pod set up should be retried later. func (bsnc *BaseUserDefinedNetworkController) ensurePodForUserDefinedNetwork(pod *corev1.Pod, addPort bool) error { - // Try unscheduled pods later if !util.PodScheduled(pod) { return nil @@ -302,7 +301,8 @@ func (bsnc *BaseUserDefinedNetworkController) ensurePodForUserDefinedNetwork(pod } func (bsnc *BaseUserDefinedNetworkController) addLogicalPortToNetworkForNAD(pod *corev1.Pod, nadName, switchName string, - network *nadapi.NetworkSelectionElement, kubevirtLiveMigrationStatus *kubevirt.LiveMigrationStatus) error { + network *nadapi.NetworkSelectionElement, kubevirtLiveMigrationStatus *kubevirt.LiveMigrationStatus, +) error { var libovsdbExecuteTime time.Duration start := time.Now() @@ -534,8 +534,20 @@ func (bsnc *BaseUserDefinedNetworkController) hasIPAMClaim(pod *corev1.Pod, nadN var ipamClaimName string var wasPersistentIPRequested bool if bsnc.IsPrimaryNetwork() { - // primary network ipam reference claim is on the annotation - ipamClaimName, wasPersistentIPRequested = pod.Annotations[util.OvnUDNIPAMClaimName] + // 'k8s.ovn.org/primary-udn-ipamclaim' annotation has been deprecated. Maintain backward compatibility by + // using it as a fallback; when defaultNSE.IPAMClaimReference is set, it takes precedence. + if desiredClaimName, isIPAMClaimRequested := pod.Annotations[util.DeprecatedOvnUDNIPAMClaimName]; isIPAMClaimRequested && desiredClaimName != "" { + wasPersistentIPRequested = true + ipamClaimName = desiredClaimName + } + defaultNSE, err := util.GetK8sPodDefaultNetworkSelection(pod) + if err != nil { + return false, err + } + if defaultNSE != nil && defaultNSE.IPAMClaimReference != "" { + wasPersistentIPRequested = true + ipamClaimName = defaultNSE.IPAMClaimReference + } } else { // secondary network the IPAM claim reference is on the network selection element nadKeys := strings.Split(nadNamespacedName, "/") @@ -920,28 +932,6 @@ func getClusterNodesDestinationBasedSNATMatch(ipFamily utilnet.IPFamily, address } } -func (bsnc *BaseUserDefinedNetworkController) ensureDHCP(pod *corev1.Pod, podAnnotation *util.PodAnnotation, lsp *nbdb.LogicalSwitchPort) error { - opts := []kubevirt.DHCPConfigsOpt{} - - ipv4DNSServer, ipv6DNSServer, err := kubevirt.RetrieveDNSServiceClusterIPs(bsnc.watchFactory) - if err != nil { - return err - } - - ipv4Gateway, _ := util.MatchFirstIPFamily(false /*ipv4*/, podAnnotation.Gateways) - if ipv4Gateway != nil { - opts = append(opts, kubevirt.WithIPv4Router(ipv4Gateway.String())) - } - - if bsnc.MTU() > 0 { - opts = append(opts, kubevirt.WithIPv4MTU(bsnc.MTU())) - } - - opts = append(opts, kubevirt.WithIPv4DNSServer(ipv4DNSServer), kubevirt.WithIPv6DNSServer(ipv6DNSServer)) - - return kubevirt.EnsureDHCPOptionsForLSP(bsnc.controllerName, bsnc.nbClient, pod, podAnnotation.IPs, lsp, opts...) -} - func (bsnc *BaseUserDefinedNetworkController) requireDHCP(pod *corev1.Pod) bool { // Configure DHCP only for kubevirt VMs layer2 primary udn with subnets return kubevirt.IsPodOwnedByVirtualMachine(pod) && @@ -951,7 +941,8 @@ func (bsnc *BaseUserDefinedNetworkController) requireDHCP(pod *corev1.Pod) bool } func (bsnc *BaseUserDefinedNetworkController) setPodLogicalSwitchPortAddressesAndEnabledField( - pod *corev1.Pod, nadName string, mac string, ips []string, enabled bool, ops []ovsdb.Operation) ([]ovsdb.Operation, *nbdb.LogicalSwitchPort, error) { + pod *corev1.Pod, nadName string, mac string, ips []string, enabled bool, ops []ovsdb.Operation, +) ([]ovsdb.Operation, *nbdb.LogicalSwitchPort, error) { lsp := &nbdb.LogicalSwitchPort{Name: bsnc.GetLogicalPortName(pod, nadName)} lsp.Enabled = ptr.To(enabled) customFields := []libovsdbops.ModelUpdateField{ @@ -987,7 +978,8 @@ func (bsnc *BaseUserDefinedNetworkController) setPodLogicalSwitchPortAddressesAn func (bsnc *BaseUserDefinedNetworkController) disableLiveMigrationSourceLSPOps( kubevirtLiveMigrationStatus *kubevirt.LiveMigrationStatus, - nadName string, ops []ovsdb.Operation) ([]ovsdb.Operation, error) { + nadName string, ops []ovsdb.Operation, +) ([]ovsdb.Operation, error) { // closing the sourcePod lsp to ensure traffic goes to the now ready targetPod. ops, _, err := bsnc.setPodLogicalSwitchPortAddressesAndEnabledField(kubevirtLiveMigrationStatus.SourcePod, nadName, "", nil, false, ops) return ops, err diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index fa9931dbba..e28c138247 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -27,8 +27,13 @@ type BaseLayer2UserDefinedNetworkController struct { // stop gracefully stops the controller, and delete all logical entities for this network if requested func (oc *BaseLayer2UserDefinedNetworkController) stop() { + if oc.stopChan == nil { + klog.Infof("Secondary %s network controller of network %s is already stopped", oc.TopologyType(), oc.GetNetworkName()) + return + } klog.Infof("Stop secondary %s network controller of network %s", oc.TopologyType(), oc.GetNetworkName()) close(oc.stopChan) + oc.stopChan = nil oc.cancelableCtx.Cancel() oc.wg.Wait() diff --git a/go-controller/pkg/ovn/kubevirt_test.go b/go-controller/pkg/ovn/kubevirt_test.go index c311ad657b..5f2ce57b3f 100644 --- a/go-controller/pkg/ovn/kubevirt_test.go +++ b/go-controller/pkg/ovn/kubevirt_test.go @@ -106,6 +106,8 @@ var _ = Describe("OVN Kubevirt Operations", func() { lrpNetworkIPv6 string subnetIPv4 string subnetIPv6 string + gwIPv4 string + gwIPv6 string transitSwitchPortIPv4 string transitSwitchPortIPv6 string addressIPv4 string @@ -126,6 +128,8 @@ var _ = Describe("OVN Kubevirt Operations", func() { nodeID: "4", subnetIPv4: "10.128.1.0/24", subnetIPv6: "fd11::/64", + gwIPv4: "10.128.1.1", + gwIPv6: "fd11::1", lrpNetworkIPv4: "100.64.0.4/24", lrpNetworkIPv6: "fd98::4/64", transitSwitchPortIPv4: "100.65.0.4/24", @@ -137,6 +141,8 @@ var _ = Describe("OVN Kubevirt Operations", func() { nodeID: "5", subnetIPv4: "10.128.2.0/24", subnetIPv6: "fd12::/64", + gwIPv4: "10.128.2.1", + gwIPv6: "fd12::1", lrpNetworkIPv4: "100.64.0.5/24", lrpNetworkIPv6: "fd98::5/64", transitSwitchPortIPv4: "100.65.0.5/24", @@ -148,6 +154,8 @@ var _ = Describe("OVN Kubevirt Operations", func() { nodeID: "6", subnetIPv4: "10.128.3.0/24", subnetIPv6: "fd13::/64", + gwIPv4: "10.128.3.1", + gwIPv6: "fd13::1", lrpNetworkIPv4: "100.64.0.6/24", lrpNetworkIPv6: "fd98::6/64", transitSwitchPortIPv4: "100.65.0.6/24", @@ -298,7 +306,8 @@ var _ = Describe("OVN Kubevirt Operations", func() { pod.Annotations[k] = v } } - ComposeDHCPv4Options = func(uuid, namespace string, t *testDHCPOptions) *nbdb.DHCPOptions { + ComposeDHCPv4Options = func(uuid, namespace string, nodeName string, t *testDHCPOptions) *nbdb.DHCPOptions { + GinkgoHelper() dhcpOptions := kubevirt.ComposeDHCPv4Options( t.cidr, DefaultNetworkControllerName, @@ -307,8 +316,10 @@ var _ = Describe("OVN Kubevirt Operations", func() { Name: t.hostname, }, ) + + dhcpOptions.Options["mtu"] = "1400" dhcpOptions.Options["dns_server"] = t.dns - dhcpOptions.Options["router"] = kubevirt.ARPProxyIPv4 + dhcpOptions.Options["router"] = nodeByName[nodeName].gwIPv4 dhcpOptions.UUID = uuid return dhcpOptions @@ -458,14 +469,14 @@ var _ = Describe("OVN Kubevirt Operations", func() { addressIPv4 = vmByName[t.vmName].addressIPv4 addresses = addressIPv4 mac = util.IPAddrToHWAddr(net.ParseIP(addressIPv4)).String() - nodeGWIP = kubevirt.ARPProxyIPv4 + nodeGWIP = nodeByName[t.nodeName].gwIPv4 } else if config.IPv6Mode && !config.IPv4Mode { subnetIPv6 = nodeByName[t.nodeName].subnetIPv6 subnets = subnetIPv6 addressIPv6 = vmByName[t.vmName].addressIPv6 addresses = addressIPv6 mac = util.IPAddrToHWAddr(net.ParseIP(addressIPv6)).String() - nodeGWIP = kubevirt.ARPProxyIPv6 + nodeGWIP = nodeByName[t.nodeName].gwIPv6 } else if config.IPv4Mode && config.IPv6Mode { subnetIPv4 = nodeByName[t.nodeName].subnetIPv4 subnetIPv6 = nodeByName[t.nodeName].subnetIPv6 @@ -474,7 +485,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { addressIPv6 = vmByName[t.vmName].addressIPv6 addresses = addressIPv4 + " " + addressIPv6 mac = util.IPAddrToHWAddr(net.ParseIP(addressIPv4)).String() - nodeGWIP = kubevirt.ARPProxyIPv4 + " " + kubevirt.ARPProxyIPv6 + nodeGWIP = nodeByName[t.nodeName].gwIPv4 + " " + nodeByName[t.nodeName].gwIPv6 } labels := map[string]string{ kubevirtv1.VirtualMachineNameLabel: t.vmName, @@ -489,7 +500,6 @@ var _ = Describe("OVN Kubevirt Operations", func() { for k, v := range t.extraAnnotations { annotations[k] = v } - t.testPod = newTPod(t.nodeName, subnets, "", nodeGWIP, "virt-launcher-"+t.suffix, addresses, mac, "namespace1") t.annotations = annotations t.labels = labels @@ -642,7 +652,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { } for i, d := range t.dhcpv4 { - initialDB.NBData = append(initialDB.NBData, ComposeDHCPv4Options(fmt.Sprintf("dhcpv4%d%s", i, d.hostname), t.namespace, &d)) + initialDB.NBData = append(initialDB.NBData, ComposeDHCPv4Options(fmt.Sprintf("dhcpv4%d%s", i, d.hostname), t.namespace, t.nodeName, &d)) } for i, d := range t.dhcpv6 { @@ -847,7 +857,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { expectedOVN = append(expectedOVN, expectedStaticRoute) } for _, d := range t.expectedDhcpv4 { - expectedOVN = append(expectedOVN, ComposeDHCPv4Options(dhcpv4OptionsUUID+d.hostname, t.namespace, &d)) + expectedOVN = append(expectedOVN, ComposeDHCPv4Options(dhcpv4OptionsUUID+d.hostname, t.namespace, t.nodeName, &d)) } for _, d := range t.expectedDhcpv6 { diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go index 634a8474f0..d93d827ce8 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go @@ -460,8 +460,13 @@ func (oc *Layer3UserDefinedNetworkController) Start(_ context.Context) error { // Stop gracefully stops the controller, and delete all logical entities for this network if requested func (oc *Layer3UserDefinedNetworkController) Stop() { + if oc.stopChan == nil { + klog.Infof("%s UDN controller of network %s is already stopped", oc.TopologyType(), oc.GetNetworkName()) + return + } klog.Infof("Stop %s UDN controller of network %s", oc.TopologyType(), oc.GetNetworkName()) close(oc.stopChan) + oc.stopChan = nil oc.cancelableCtx.Cancel() oc.wg.Wait() diff --git a/go-controller/pkg/ovn/multihoming_test.go b/go-controller/pkg/ovn/multihoming_test.go index 1d184e6f96..e41593dd77 100644 --- a/go-controller/pkg/ovn/multihoming_test.go +++ b/go-controller/pkg/ovn/multihoming_test.go @@ -471,7 +471,8 @@ func newMultiHomedPod(testPod testPod, multiHomingConfigs ...userDefinedNetInfo) for _, multiHomingConf := range multiHomingConfigs { if multiHomingConf.isPrimary { if multiHomingConf.ipamClaimReference != "" { - pod.Annotations[util.OvnUDNIPAMClaimName] = multiHomingConf.ipamClaimReference + pod.Annotations[util.DeprecatedOvnUDNIPAMClaimName] = multiHomingConf.ipamClaimReference + pod.Annotations[util.DefNetworkAnnotation] = generateDefaultNSEAnnotation(multiHomingConf) } continue // these will be automatically plugged in } @@ -500,6 +501,22 @@ func newMultiHomedPod(testPod testPod, multiHomingConfigs ...userDefinedNetInfo) return pod } +func generateDefaultNSEAnnotation(multiHomingConf userDefinedNetInfo) string { + nadNamePair := strings.Split(multiHomingConf.nadName, "/") + if len(nadNamePair) != 2 { + panic("failed to generate default NSE: invalid NAD name. Expected NAD name format: '/'") + } + bytes, err := json.Marshal([]nadapi.NetworkSelectionElement{{ + Namespace: nadNamePair[0], + Name: nadNamePair[1], + IPAMClaimReference: multiHomingConf.ipamClaimReference, + }}) + if err != nil { + panic(fmt.Errorf("failed to generate default NSE: %v", err)) + } + return string(bytes) +} + func dummyOVNPodNetworkAnnotations(secondaryPodInfos map[string]*udnPodInfo, multiHomingConfigs []userDefinedNetInfo) string { var ovnPodNetworksAnnotations []byte podAnnotations := map[string]podAnnotation{} diff --git a/go-controller/pkg/ovn/pods.go b/go-controller/pkg/ovn/pods.go index 80c431ef13..ebb2ed8433 100644 --- a/go-controller/pkg/ovn/pods.go +++ b/go-controller/pkg/ovn/pods.go @@ -381,12 +381,12 @@ func (oc *DefaultNetworkController) addLogicalPort(pod *corev1.Pod) (err error) _ = oc.logicalPortCache.add(pod, switchName, types.DefaultNetworkName, lsp.UUID, podAnnotation.MAC, podAnnotation.IPs) if kubevirt.IsPodLiveMigratable(pod) { - if err := kubevirt.EnsureDHCPOptionsForMigratablePod(oc.controllerName, oc.nbClient, oc.watchFactory, pod, podAnnotation.IPs, lsp); err != nil { - return err + if err := oc.ensureDHCP(pod, podAnnotation, lsp); err != nil { + return fmt.Errorf("failed configuring DHCP for default network at pod %s/%s: %w", pod.Namespace, pod.Name, err) } } - //observe the pod creation latency metric for newly created pods only + // observe the pod creation latency metric for newly created pods only if newlyCreatedPort { metrics.RecordPodCreated(pod, oc.GetNetInfo()) } diff --git a/go-controller/pkg/persistentips/allocator.go b/go-controller/pkg/persistentips/allocator.go index 2cb87e45d2..c586564a6c 100644 --- a/go-controller/pkg/persistentips/allocator.go +++ b/go-controller/pkg/persistentips/allocator.go @@ -10,9 +10,12 @@ import ( ipamclaimsapi "github.com/k8snetworkplumbingwg/ipamclaims/pkg/crd/ipamclaims/v1alpha1" ipamclaimslister "github.com/k8snetworkplumbingwg/ipamclaims/pkg/crd/ipamclaims/v1alpha1/apis/listers/ipamclaims/v1alpha1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/mac" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" ovnktypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -35,6 +38,8 @@ type PersistentAllocations interface { FindIPAMClaim(claimName string, namespace string) (*ipamclaimsapi.IPAMClaim, error) Reconcile(oldIPAMClaim *ipamclaimsapi.IPAMClaim, newIPAMClaim *ipamclaimsapi.IPAMClaim, ipReleaser IPReleaser) error + + UpdateIPAMClaimStatus(ipamClaim *ipamclaimsapi.IPAMClaim, podAnnotation *util.PodAnnotation, podName string, allocationErr error) *ipamclaimsapi.IPAMClaim } // IPAMClaimReconciler acts on IPAMClaim events handed off by the cluster network @@ -197,3 +202,63 @@ func (icr *IPAMClaimReconciler) releaseIPs(ipamClaim *ipamclaimsapi.IPAMClaim, i klog.V(5).Infof("Released IPs: %+v", ips) return nil } + +// UpdateIPAMClaimStatus updates the IPAM claim status. +// This method handles status updates and error logging internally. +func (icr *IPAMClaimReconciler) UpdateIPAMClaimStatus( + ipamClaim *ipamclaimsapi.IPAMClaim, + podAnnotation *util.PodAnnotation, + podName string, + allocationErr error, +) *ipamclaimsapi.IPAMClaim { + updatedClaim := ipamClaim.DeepCopy() + updatedClaim.Status.OwnerPod = &ipamclaimsapi.OwnerPod{Name: podName} + if allocationErr != nil { + updateIPAMClaimAllocationErrorStatus(updatedClaim, allocationErr) + } else { + updateIPAMClaimAllocationSuccessStatus(updatedClaim, podAnnotation) + } + return updatedClaim +} + +func updateIPAMClaimAllocationSuccessStatus( + updatedClaim *ipamclaimsapi.IPAMClaim, + podAnnotation *util.PodAnnotation, +) { + if podAnnotation != nil && len(podAnnotation.IPs) > 0 { + updatedClaim.Status.IPs = util.StringSlice(podAnnotation.IPs) + } + + setIPClaimIPsAllocatedStatusCondition(updatedClaim, metav1.ConditionTrue, "SuccessfulAllocation", "IP addresses successfully allocated") +} + +func updateIPAMClaimAllocationErrorStatus( + updatedClaim *ipamclaimsapi.IPAMClaim, + allocationErr error, +) { + updatedClaim.Status.IPs = []string{} + var reason string + + if ipam.IsErrFull(allocationErr) { + reason = "SubnetExhausted" + } else if ipam.IsErrAllocated(allocationErr) { + reason = "IPAddressConflict" + } else if errors.Is(allocationErr, mac.ErrReserveMACConflict) { + reason = "MACAddressConflict" + } else { + reason = "InternalError" + } + + setIPClaimIPsAllocatedStatusCondition(updatedClaim, metav1.ConditionFalse, reason, allocationErr.Error()) +} + +func setIPClaimIPsAllocatedStatusCondition(updatedClaim *ipamclaimsapi.IPAMClaim, status metav1.ConditionStatus, reason, message string) { + meta.SetStatusCondition(&updatedClaim.Status.Conditions, metav1.Condition{ + Type: "IPsAllocated", + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: updatedClaim.Generation, + LastTransitionTime: metav1.Now(), + }) +} diff --git a/go-controller/pkg/testing/util.go b/go-controller/pkg/testing/util.go index 6c5a4ed243..0a49731b94 100644 --- a/go-controller/pkg/testing/util.go +++ b/go-controller/pkg/testing/util.go @@ -1,12 +1,17 @@ package testing import ( + "encoding/json" "fmt" nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ktesting "k8s.io/client-go/testing" + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + networkconnectfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ) @@ -69,3 +74,38 @@ func GenerateNADWithConfig(name, namespace, config string) *nadapi.NetworkAttach Spec: nadapi.NetworkAttachmentDefinitionSpec{Config: config}, } } + +// AddNetworkConnectApplyReactor adds a reactor to handle Apply (patch) operations on the fake client. +func AddNetworkConnectApplyReactor(fakeClient *networkconnectfake.Clientset) { + fakeClient.PrependReactor("patch", "clusternetworkconnects", func(action ktesting.Action) (bool, runtime.Object, error) { + patchAction := action.(ktesting.PatchAction) + name := patchAction.GetName() + + existingObj, err := fakeClient.Tracker().Get( + networkconnectv1.SchemeGroupVersion.WithResource("clusternetworkconnects"), "", name) + if err != nil { + return true, nil, err + } + + cnc := existingObj.(*networkconnectv1.ClusterNetworkConnect) + if cnc.Annotations == nil { + cnc.Annotations = map[string]string{} + } + + var patchData map[string]interface{} + if err := json.Unmarshal(patchAction.GetPatch(), &patchData); err != nil { + return true, nil, err + } + if metadata, ok := patchData["metadata"].(map[string]interface{}); ok { + if annotations, ok := metadata["annotations"].(map[string]interface{}); ok { + for k, v := range annotations { + cnc.Annotations[k] = v.(string) + } + } + } + + _ = fakeClient.Tracker().Update( + networkconnectv1.SchemeGroupVersion.WithResource("clusternetworkconnects"), cnc, "") + return true, cnc, nil + }) +} diff --git a/go-controller/pkg/util/fake_client.go b/go-controller/pkg/util/fake_client.go index 51b624cac7..0ca981e849 100644 --- a/go-controller/pkg/util/fake_client.go +++ b/go-controller/pkg/util/fake_client.go @@ -23,6 +23,8 @@ import ( adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" adminpolicybasedroutefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" + networkconnect "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + networkconnectfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned/fake" egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" @@ -55,6 +57,7 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { udnObjects := []runtime.Object{} raObjects := []runtime.Object{} frrObjects := []runtime.Object{} + networkConnectObjects := []runtime.Object{} for _, object := range objects { switch object.(type) { case *egressip.EgressIP: @@ -85,6 +88,8 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { frrObjects = append(frrObjects, object) case *networkqos.NetworkQoS: networkQoSObjects = append(networkQoSObjects, object) + case *networkconnect.ClusterNetworkConnect: + networkConnectObjects = append(networkConnectObjects, object) default: v1Objects = append(v1Objects, object) } @@ -113,6 +118,7 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { RouteAdvertisementsClient: routeadvertisementsfake.NewSimpleClientset(raObjects...), FRRClient: frrfake.NewSimpleClientset(frrObjects...), NetworkQoSClient: networkqosfake.NewSimpleClientset(networkQoSObjects...), + NetworkConnectClient: networkconnectfake.NewSimpleClientset(networkConnectObjects...), } } diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 0e030a4ae3..9c0c54e550 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -46,6 +46,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + networkconnectclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned" egressfirewallclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned" egressipclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned" egressqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned" @@ -70,6 +71,7 @@ type OVNClientset struct { AdminPolicyRouteClient adminpolicybasedrouteclientset.Interface IPAMClaimsClient ipamclaimssclientset.Interface UserDefinedNetworkClient userdefinednetworkclientset.Interface + NetworkConnectClient networkconnectclientset.Interface RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface NetworkQoSClient networkqosclientset.Interface @@ -136,6 +138,7 @@ type OVNClusterManagerClientset struct { IPAMClaimsClient ipamclaimssclientset.Interface OCPNetworkClient ocpnetworkclientset.Interface UserDefinedNetworkClient userdefinednetworkclientset.Interface + NetworkConnectClient networkconnectclientset.Interface RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface NetworkQoSClient networkqosclientset.Interface @@ -224,6 +227,7 @@ func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset IPAMClaimsClient: cs.IPAMClaimsClient, OCPNetworkClient: cs.OCPNetworkClient, UserDefinedNetworkClient: cs.UserDefinedNetworkClient, + NetworkConnectClient: cs.NetworkConnectClient, RouteAdvertisementsClient: cs.RouteAdvertisementsClient, FRRClient: cs.FRRClient, NetworkQoSClient: cs.NetworkQoSClient, @@ -521,6 +525,11 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { return nil, err } + networkConnectClientset, err := networkconnectclientset.NewForConfig(kconfig) + if err != nil { + return nil, err + } + routeAdvertisementsClientset, err := routeadvertisementsclientset.NewForConfig(kconfig) if err != nil { return nil, err @@ -550,6 +559,7 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { AdminPolicyRouteClient: adminPolicyBasedRouteClientset, IPAMClaimsClient: ipamClaimsClientset, UserDefinedNetworkClient: userDefinedNetworkClientSet, + NetworkConnectClient: networkConnectClientset, RouteAdvertisementsClient: routeAdvertisementsClientset, FRRClient: frrClientset, NetworkQoSClient: networkqosClientset, diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index 4749542fa1..10bde09120 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -1543,7 +1543,6 @@ func overrideActiveNSEWithDefaultNSE(defaultNSE, activeNSE *nettypes.NetworkSele } activeNSE.IPRequest = defaultNSE.IPRequest activeNSE.MacRequest = defaultNSE.MacRequest - activeNSE.IPAMClaimReference = defaultNSE.IPAMClaimReference return nil } @@ -1586,16 +1585,32 @@ func GetPodNADToNetworkMappingWithActiveNetwork(pod *corev1.Pod, nInfo NetInfo, Name: activeNADKey.Name, } + isPersistentIPsPrimaryNetwork := nInfo.IsPrimaryNetwork() && AllowsPersistentIPs(nInfo) + var defaultNSE *nettypes.NetworkSelectionElement + if isPersistentIPsPrimaryNetwork || IsPreconfiguredUDNAddressesEnabled() { + defaultNSE, err = GetK8sPodDefaultNetworkSelection(pod) + if err != nil { + return false, nil, fmt.Errorf("failed getting default-network annotation for pod %q: %w", pod.Namespace+"/"+pod.Name, err) + } + } + + if isPersistentIPsPrimaryNetwork { + // 'k8s.ovn.org/primary-udn-ipamclaim' annotation has been deprecated. Maintain backward compatibility by + // using it as a fallback; when defaultNSE.IPAMClaimReference is set, it takes precedence. + if ipamClaimName, wasPersistentIPRequested := pod.Annotations[DeprecatedOvnUDNIPAMClaimName]; wasPersistentIPRequested { + activeNSE.IPAMClaimReference = ipamClaimName + } + if defaultNSE != nil && defaultNSE.IPAMClaimReference != "" { + activeNSE.IPAMClaimReference = defaultNSE.IPAMClaimReference + } + } + // Feature gate integration: EnablePreconfiguredUDNAddresses controls default network IP/MAC transfer to active network if IsPreconfiguredUDNAddressesEnabled() { // Limit the static ip and mac requests to the layer2 primary UDN when EnablePreconfiguredUDNAddresses is enabled, we // don't need to explicitly check this is primary UDN since // the "active network" concept is exactly that. if activeNetwork.TopologyType() == types.Layer2Topology { - defaultNSE, err := GetK8sPodDefaultNetworkSelection(pod) - if err != nil { - return false, nil, fmt.Errorf("failed getting default-network annotation for pod %q: %w", pod.Namespace+"/"+pod.Name, err) - } // If there are static IPs and MACs at the default NSE, override the active NSE with them if defaultNSE != nil { if err := overrideActiveNSEWithDefaultNSE(defaultNSE, activeNSE); err != nil { @@ -1605,13 +1620,6 @@ func GetPodNADToNetworkMappingWithActiveNetwork(pod *corev1.Pod, nInfo NetInfo, } } - if nInfo.IsPrimaryNetwork() && AllowsPersistentIPs(nInfo) && activeNSE.IPAMClaimReference == "" { - ipamClaimName, wasPersistentIPRequested := pod.Annotations[OvnUDNIPAMClaimName] - if wasPersistentIPRequested { - activeNSE.IPAMClaimReference = ipamClaimName - } - } - networkSelections[activeNADKey.String()] = activeNSE return true, networkSelections, nil } @@ -1640,6 +1648,10 @@ func IsNetworkSegmentationSupportEnabled() bool { return config.OVNKubernetesFeature.EnableMultiNetwork && config.OVNKubernetesFeature.EnableNetworkSegmentation } +func IsNetworkConnectEnabled() bool { + return IsNetworkSegmentationSupportEnabled() && config.OVNKubernetesFeature.EnableNetworkConnect +} + func IsRouteAdvertisementsEnabled() bool { // for now, we require multi-network to be enabled because we rely on NADs, // even for the default network diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index a2eae6f0be..e0335e0f5e 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -1107,7 +1107,103 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { }, inputPodAnnotations: map[string]string{ nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), - OvnUDNIPAMClaimName: "the-one-to-the-left-of-the-pony", + DefNetworkAnnotation: `[{"ipam-claim-reference":"the-one-to-the-left-of-the-pony","namespace":"ns1","name":"attachment1"}]`, + }, + expectedIsAttachmentRequested: true, + expectedNetworkSelectionElements: map[string]*nadv1.NetworkSelectionElement{ + "ns1/attachment1": { + Name: "attachment1", + Namespace: "ns1", + IPAMClaimReference: "the-one-to-the-left-of-the-pony", + }, + }, + }, + { + desc: "the network configuration for a primary layer2 UDN features allow persistent IPs, and the pod requests it." + + "Using deprecated UDN IPAMClaim annotation", + // verify backward compatibility for deprecated annotation 'k8s.ovn.org/primary-udn-ipamclaim' + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + AllowPersistentIPs: true, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + AllowPersistentIPs: true, + }, + inputPodAnnotations: map[string]string{ + nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), + DeprecatedOvnUDNIPAMClaimName: "the-one-to-the-left-of-the-pony", + }, + expectedIsAttachmentRequested: true, + expectedNetworkSelectionElements: map[string]*nadv1.NetworkSelectionElement{ + "ns1/attachment1": { + Name: "attachment1", + Namespace: "ns1", + IPAMClaimReference: "the-one-to-the-left-of-the-pony", + }, + }, + }, + { + desc: "the network configuration for a primary layer2 UDN features allow persistent IPs, and the pod requests it." + + "Pod has both defaultNSE with ipam-claim reference and UDNIPAMClaim annotations, specifying the same IPAMClaim CR", + // verify backward compatibility for deprecated annotation 'k8s.ovn.org/primary-udn-ipamclaim' + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + AllowPersistentIPs: true, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + AllowPersistentIPs: true, + }, + inputPodAnnotations: map[string]string{ + nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), + DefNetworkAnnotation: `[{"ipam-claim-reference":"the-one-to-the-left-of-the-pony","namespace":"ns1","name":"attachment1"}]`, + DeprecatedOvnUDNIPAMClaimName: "the-one-to-the-left-of-the-pony", + }, + expectedIsAttachmentRequested: true, + expectedNetworkSelectionElements: map[string]*nadv1.NetworkSelectionElement{ + "ns1/attachment1": { + Name: "attachment1", + Namespace: "ns1", + IPAMClaimReference: "the-one-to-the-left-of-the-pony", + }, + }, + }, + { + desc: "the network configuration for a primary layer2 UDN features allow persistent IPs, and the pod requests it." + + "Pod has both defaultNSE with ipam-claim reference and UDNIPAMClaim annotations, specifying different IPAMClaim CR." + + "DefaultNSE's ipam-claim reference should take precedence", + // verify backward compatibility for deprecated annotation 'k8s.ovn.org/primary-udn-ipamclaim' + inputNetConf: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + AllowPersistentIPs: true, + }, + inputPrimaryUDNConfig: &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + AllowPersistentIPs: true, + }, + inputPodAnnotations: map[string]string{ + nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), + DefNetworkAnnotation: `[{"ipam-claim-reference":"the-one-to-the-left-of-the-pony","namespace":"ns1","name":"attachment1"}]`, + DeprecatedOvnUDNIPAMClaimName: "the-one-to-the-right-of-the-horse", }, expectedIsAttachmentRequested: true, expectedNetworkSelectionElements: map[string]*nadv1.NetworkSelectionElement{ @@ -1162,8 +1258,8 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { AllowPersistentIPs: true, }, inputPodAnnotations: map[string]string{ - nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), - OvnUDNIPAMClaimName: "the-one-to-the-left-of-the-pony", + nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), + DeprecatedOvnUDNIPAMClaimName: "the-one-to-the-left-of-the-pony", }, expectedIsAttachmentRequested: true, expectedNetworkSelectionElements: map[string]*nadv1.NetworkSelectionElement{ @@ -1176,16 +1272,18 @@ func TestGetPodNADToNetworkMappingWithActiveNetwork(t *testing.T) { { desc: "the network configuration for a primary layer2 UDN receive pod requesting IP, MAC and IPAMClaimRef on default network annotation for it", inputNetConf: &ovncnitypes.NetConf{ - NetConf: cnitypes.NetConf{Name: networkName}, - Topology: ovntypes.Layer2Topology, - NADName: GetNADName(namespaceName, attachmentName), - Role: ovntypes.NetworkRolePrimary, + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + AllowPersistentIPs: true, }, inputPrimaryUDNConfig: &ovncnitypes.NetConf{ - NetConf: cnitypes.NetConf{Name: networkName}, - Topology: ovntypes.Layer2Topology, - NADName: GetNADName(namespaceName, attachmentName), - Role: ovntypes.NetworkRolePrimary, + NetConf: cnitypes.NetConf{Name: networkName}, + Topology: ovntypes.Layer2Topology, + NADName: GetNADName(namespaceName, attachmentName), + Role: ovntypes.NetworkRolePrimary, + AllowPersistentIPs: true, }, inputPodAnnotations: map[string]string{ nadv1.NetworkAttachmentAnnot: GetNADName(namespaceName, "another-network"), diff --git a/go-controller/pkg/util/network_connect_annotation.go b/go-controller/pkg/util/network_connect_annotation.go new file mode 100644 index 0000000000..01f97667ca --- /dev/null +++ b/go-controller/pkg/util/network_connect_annotation.go @@ -0,0 +1,149 @@ +package util + +import ( + "context" + "encoding/json" + "fmt" + "net" + "strconv" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + networkconnectapply "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/applyconfiguration/clusternetworkconnect/v1" + networkconnectclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned" +) + +const ( + ovnNetworkConnectSubnetAnnotation = "k8s.ovn.org/network-connect-subnet" + OvnConnectRouterTunnelKeyAnnotation = "k8s.ovn.org/connect-router-tunnel-key" + networkConnectSubnetAnnotationFieldManager = "ovn-kubernetes-network-connect-controller-subnet-annotation" + networkConnectRouterTunnelKeyFieldManager = "ovn-kubernetes-network-connect-controller-tunnel-key-annotation" +) + +type NetworkConnectSubnetAnnotation struct { + IPv4 string `json:"ipv4,omitempty"` + IPv6 string `json:"ipv6,omitempty"` +} + +// UpdateNetworkConnectSubnetAnnotation patches the subnet annotation for the given CNC and given allocated subnets. +// It uses the Apply method to patch the annotation and has its own manager field to avoid conflicts with other annotation patches +// like the router tunnel key annotation patch below. +func UpdateNetworkConnectSubnetAnnotation(cnc *networkconnectv1.ClusterNetworkConnect, cncClient networkconnectclientset.Interface, allocatedSubnets map[string][]*net.IPNet) error { + // Build annotation directly from allocatedSubnets (always the full list) + subnetsMap := make(map[string]NetworkConnectSubnetAnnotation) + for networkName, subnets := range allocatedSubnets { + annotation := NetworkConnectSubnetAnnotation{} + for _, subnet := range subnets { + if subnet.IP.To4() != nil { + annotation.IPv4 = subnet.String() + } else { + annotation.IPv6 = subnet.String() + } + } + subnetsMap[networkName] = annotation + } + + bytes, err := json.Marshal(subnetsMap) + if err != nil { + return fmt.Errorf("failed to marshal network connect subnet annotation: %v", err) + } + + // Apply only the annotation this controller manages, leaving other annotations untouched + applyObj := networkconnectapply.ClusterNetworkConnect(cnc.Name) + applyObj.Annotations = map[string]string{ + ovnNetworkConnectSubnetAnnotation: string(bytes), + } + _, err = cncClient.K8sV1().ClusterNetworkConnects().Apply(context.TODO(), applyObj, + metav1.ApplyOptions{FieldManager: networkConnectSubnetAnnotationFieldManager, Force: true}) + if err != nil { + return fmt.Errorf("failed to apply network connect subnet annotation: %v", err) + } + klog.V(5).Infof("Updated network connect subnet annotation for CNC %s with %d subnets", cnc.Name, len(allocatedSubnets)) + return nil +} + +// ParseNetworkConnectSubnetAnnotation parses the subnet annotation from the given CNC. +// Returns a map of owner (e.g., "layer3_1", "layer2_2") to allocated subnets. +// Returns empty map if annotation is missing or empty. +func ParseNetworkConnectSubnetAnnotation(cnc *networkconnectv1.ClusterNetworkConnect) (map[string][]*net.IPNet, error) { + result := make(map[string][]*net.IPNet) + + if cnc == nil || cnc.Annotations == nil { + return result, nil + } + + annotationValue, exists := cnc.Annotations[ovnNetworkConnectSubnetAnnotation] + if !exists || annotationValue == "" || annotationValue == "{}" { + return result, nil + } + + var subnetsMap map[string]NetworkConnectSubnetAnnotation + if err := json.Unmarshal([]byte(annotationValue), &subnetsMap); err != nil { + return nil, fmt.Errorf("failed to unmarshal network connect subnet annotation: %v", err) + } + + for owner, annotation := range subnetsMap { + var subnets []*net.IPNet + if annotation.IPv4 != "" { + _, ipnet, err := net.ParseCIDR(annotation.IPv4) + if err != nil { + return nil, fmt.Errorf("failed to parse IPv4 subnet %s for owner %s: %v", annotation.IPv4, owner, err) + } + subnets = append(subnets, ipnet) + } + if annotation.IPv6 != "" { + _, ipnet, err := net.ParseCIDR(annotation.IPv6) + if err != nil { + return nil, fmt.Errorf("failed to parse IPv6 subnet %s for owner %s: %v", annotation.IPv6, owner, err) + } + subnets = append(subnets, ipnet) + } + if len(subnets) > 0 { + result[owner] = subnets + } + } + + return result, nil +} + +// UpdateNetworkConnectRouterTunnelKeyAnnotation updates the router tunnel key annotation for the given CNC and given tunnel ID. +// It uses the Apply method to patch the annotation and has its own manager field to avoid conflicts with other annotation patches +// like the subnet annotation patch above. +func UpdateNetworkConnectRouterTunnelKeyAnnotation(cncName string, cncClient networkconnectclientset.Interface, tunnelID int) error { + applyObj := networkconnectapply.ClusterNetworkConnect(cncName). + WithAnnotations(map[string]string{ + OvnConnectRouterTunnelKeyAnnotation: strconv.Itoa(tunnelID), + }) + _, err := cncClient.K8sV1().ClusterNetworkConnects().Apply( + context.TODO(), + applyObj, + metav1.ApplyOptions{FieldManager: networkConnectRouterTunnelKeyFieldManager, Force: true}, + ) + if err != nil { + return fmt.Errorf("failed to apply network connect router tunnel key annotation: %v", err) + } + klog.V(5).Infof("Updated network connect router tunnel key annotation for CNC %s with tunnel ID %d", cncName, tunnelID) + return nil +} + +// ParseNetworkConnectTunnelKeyAnnotation parses the tunnel key annotation from the given CNC. +// Returns 0 if annotation is missing. +func ParseNetworkConnectTunnelKeyAnnotation(cnc *networkconnectv1.ClusterNetworkConnect) (int, error) { + if cnc == nil || cnc.Annotations == nil { + return 0, nil + } + + annotationValue, exists := cnc.Annotations[OvnConnectRouterTunnelKeyAnnotation] + if !exists || annotationValue == "" { + return 0, nil + } + + tunnelID, err := strconv.Atoi(annotationValue) + if err != nil { + return 0, fmt.Errorf("failed to parse tunnel key annotation: %v", err) + } + + return tunnelID, nil +} diff --git a/go-controller/pkg/util/network_connect_annotation_unit_test.go b/go-controller/pkg/util/network_connect_annotation_unit_test.go new file mode 100644 index 0000000000..3faa3e1d4a --- /dev/null +++ b/go-controller/pkg/util/network_connect_annotation_unit_test.go @@ -0,0 +1,320 @@ +package util + +import ( + "context" + "encoding/json" + "net" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + networkconnectv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1" + networkconnectfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned/fake" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" +) + +func TestUpdateNetworkConnectSubnetAnnotation(t *testing.T) { + tests := []struct { + name string + cncName string + allocatedSubnets map[string][]*net.IPNet + expectedSubnetMap map[string]NetworkConnectSubnetAnnotation + expectError bool + }{ + { + name: "single IPv4 subnet for one network", + cncName: "test-cnc", + allocatedSubnets: map[string][]*net.IPNet{ + "network1": {ovntest.MustParseIPNet("10.0.0.0/24")}, + }, + expectedSubnetMap: map[string]NetworkConnectSubnetAnnotation{ + "network1": {IPv4: "10.0.0.0/24"}, + }, + expectError: false, + }, + { + name: "single IPv6 subnet for one network", + cncName: "test-cnc", + allocatedSubnets: map[string][]*net.IPNet{ + "network1": {ovntest.MustParseIPNet("fd00::/64")}, + }, + expectedSubnetMap: map[string]NetworkConnectSubnetAnnotation{ + "network1": {IPv6: "fd00::/64"}, + }, + expectError: false, + }, + { + name: "dual-stack subnets for one network", + cncName: "test-cnc", + allocatedSubnets: map[string][]*net.IPNet{ + "network1": { + ovntest.MustParseIPNet("10.0.0.0/24"), + ovntest.MustParseIPNet("fd00::/64"), + }, + }, + expectedSubnetMap: map[string]NetworkConnectSubnetAnnotation{ + "network1": {IPv4: "10.0.0.0/24", IPv6: "fd00::/64"}, + }, + expectError: false, + }, + { + name: "multiple networks with different subnets", + cncName: "test-cnc", + allocatedSubnets: map[string][]*net.IPNet{ + "network1": {ovntest.MustParseIPNet("10.0.1.0/24")}, + "network2": {ovntest.MustParseIPNet("10.0.2.0/24")}, + "network3": { + ovntest.MustParseIPNet("10.0.3.0/24"), + ovntest.MustParseIPNet("fd00:3::/64"), + }, + }, + expectedSubnetMap: map[string]NetworkConnectSubnetAnnotation{ + "network1": {IPv4: "10.0.1.0/24"}, + "network2": {IPv4: "10.0.2.0/24"}, + "network3": {IPv4: "10.0.3.0/24", IPv6: "fd00:3::/64"}, + }, + expectError: false, + }, + { + name: "empty allocated subnets", + cncName: "test-cnc", + allocatedSubnets: map[string][]*net.IPNet{}, + expectedSubnetMap: map[string]NetworkConnectSubnetAnnotation{}, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a CNC object + cnc := &networkconnectv1.ClusterNetworkConnect{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "k8s.ovn.org/v1", + Kind: "ClusterNetworkConnect", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: tt.cncName, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{}, + } + + // Create fake client with Apply reactor + fakeClient := networkconnectfake.NewSimpleClientset(cnc) + ovntest.AddNetworkConnectApplyReactor(fakeClient) + + // Call the function under test + err := UpdateNetworkConnectSubnetAnnotation(cnc, fakeClient, tt.allocatedSubnets) + + if tt.expectError { + require.Error(t, err) + return + } + require.NoError(t, err) + + // Get the updated CNC from the fake client + updatedCNC, err := fakeClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), tt.cncName, metav1.GetOptions{}) + require.NoError(t, err) + + // Verify the annotation was set correctly + annotationValue, ok := updatedCNC.Annotations[ovnNetworkConnectSubnetAnnotation] + if len(tt.expectedSubnetMap) == 0 { + // For empty subnets, we still expect the annotation to be set (as empty JSON object) + assert.True(t, ok, "expected annotation to be set") + assert.Equal(t, "{}", annotationValue) + } else { + assert.True(t, ok, "expected annotation to be set") + + // Parse the annotation and compare + var actualSubnetMap map[string]NetworkConnectSubnetAnnotation + err = json.Unmarshal([]byte(annotationValue), &actualSubnetMap) + require.NoError(t, err) + + assert.Equal(t, tt.expectedSubnetMap, actualSubnetMap) + } + }) + } +} + +func TestUpdateNetworkConnectRouterTunnelKeyAnnotation(t *testing.T) { + tests := []struct { + name string + cncName string + tunnelID int + expectedTunnel string + existingAnnots map[string]string + }{ + { + name: "set tunnel key on existing CNC", + cncName: "test-cnc", + tunnelID: 12345, + expectedTunnel: "12345", + }, + { + name: "set tunnel key with zero value", + cncName: "test-cnc-zero", + tunnelID: 0, + expectedTunnel: "0", + }, + { + name: "set large tunnel key", + cncName: "test-cnc-large", + tunnelID: 2147483647, // max int32 + expectedTunnel: "2147483647", + }, + { + name: "update existing tunnel key", + cncName: "test-cnc-update", + tunnelID: 99999, + expectedTunnel: "99999", + existingAnnots: map[string]string{ + OvnConnectRouterTunnelKeyAnnotation: "11111", + }, + }, + { + name: "preserve other annotations when setting tunnel key", + cncName: "test-cnc-preserve", + tunnelID: 54321, + expectedTunnel: "54321", + existingAnnots: map[string]string{ + "other-annotation": "other-value", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cnc := &networkconnectv1.ClusterNetworkConnect{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "k8s.ovn.org/v1", + Kind: "ClusterNetworkConnect", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: tt.cncName, + Annotations: tt.existingAnnots, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{}, + } + fakeClient := networkconnectfake.NewSimpleClientset(cnc) + ovntest.AddNetworkConnectApplyReactor(fakeClient) + + err := UpdateNetworkConnectRouterTunnelKeyAnnotation(tt.cncName, fakeClient, tt.tunnelID) + require.NoError(t, err) + + updatedCNC, err := fakeClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), tt.cncName, metav1.GetOptions{}) + require.NoError(t, err) + + // Verify the tunnel key annotation was set correctly + tunnelValue, ok := updatedCNC.Annotations[OvnConnectRouterTunnelKeyAnnotation] + assert.True(t, ok, "expected tunnel key annotation to be set") + assert.Equal(t, tt.expectedTunnel, tunnelValue) + + // Verify other annotations are preserved + for k, v := range tt.existingAnnots { + if k != OvnConnectRouterTunnelKeyAnnotation { + assert.Equal(t, v, updatedCNC.Annotations[k], + "expected annotation %s to be preserved", k) + } + } + }) + } +} + +func TestBothAnnotationsCanCoexist(t *testing.T) { + cncName := "test-cnc-coexist" + + // Create a CNC object + cnc := &networkconnectv1.ClusterNetworkConnect{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "k8s.ovn.org/v1", + Kind: "ClusterNetworkConnect", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: cncName, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{}, + } + + // Create fake client with Apply reactor + fakeClient := networkconnectfake.NewSimpleClientset(cnc) + ovntest.AddNetworkConnectApplyReactor(fakeClient) + + // First, set the subnet annotation + allocatedSubnets := map[string][]*net.IPNet{ + "network1": {ovntest.MustParseIPNet("10.0.0.0/24")}, + } + err := UpdateNetworkConnectSubnetAnnotation(cnc, fakeClient, allocatedSubnets) + require.NoError(t, err) + + // Then, set the tunnel key annotation + err = UpdateNetworkConnectRouterTunnelKeyAnnotation(cncName, fakeClient, 12345) + require.NoError(t, err) + + // Get the final CNC + finalCNC, err := fakeClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cncName, metav1.GetOptions{}) + require.NoError(t, err) + + // Verify both annotations exist + _, hasSubnetAnnot := finalCNC.Annotations[ovnNetworkConnectSubnetAnnotation] + _, hasTunnelAnnot := finalCNC.Annotations[OvnConnectRouterTunnelKeyAnnotation] + + assert.True(t, hasSubnetAnnot, "expected subnet annotation to be present") + assert.True(t, hasTunnelAnnot, "expected tunnel key annotation to be present") + + // Verify the tunnel key value + assert.Equal(t, "12345", finalCNC.Annotations[OvnConnectRouterTunnelKeyAnnotation]) + + // Verify the subnet annotation value + var subnetMap map[string]NetworkConnectSubnetAnnotation + err = json.Unmarshal([]byte(finalCNC.Annotations[ovnNetworkConnectSubnetAnnotation]), &subnetMap) + require.NoError(t, err) + assert.Equal(t, "10.0.0.0/24", subnetMap["network1"].IPv4) +} + +func TestUpdateNetworkConnectSubnetAnnotation_PreservesExistingAnnotations(t *testing.T) { + cncName := "test-cnc-preserve" + + // Create a CNC object with existing annotations + cnc := &networkconnectv1.ClusterNetworkConnect{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "k8s.ovn.org/v1", + Kind: "ClusterNetworkConnect", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: cncName, + Annotations: map[string]string{ + "existing-annotation": "existing-value", + OvnConnectRouterTunnelKeyAnnotation: "99999", + }, + }, + Spec: networkconnectv1.ClusterNetworkConnectSpec{}, + } + + // Create fake client with Apply reactor + fakeClient := networkconnectfake.NewSimpleClientset(cnc) + ovntest.AddNetworkConnectApplyReactor(fakeClient) + + // Set the subnet annotation + allocatedSubnets := map[string][]*net.IPNet{ + "network1": {ovntest.MustParseIPNet("10.0.0.0/24")}, + } + err := UpdateNetworkConnectSubnetAnnotation(cnc, fakeClient, allocatedSubnets) + require.NoError(t, err) + + // Get the updated CNC + updatedCNC, err := fakeClient.K8sV1().ClusterNetworkConnects().Get( + context.Background(), cncName, metav1.GetOptions{}) + require.NoError(t, err) + + // Verify existing annotations are preserved + assert.Equal(t, "existing-value", updatedCNC.Annotations["existing-annotation"]) + assert.Equal(t, "99999", updatedCNC.Annotations[OvnConnectRouterTunnelKeyAnnotation]) + + // Verify the new subnet annotation was added + assert.Contains(t, updatedCNC.Annotations, ovnNetworkConnectSubnetAnnotation) +} diff --git a/go-controller/pkg/util/pod_annotation.go b/go-controller/pkg/util/pod_annotation.go index df89537b30..614bcf473c 100644 --- a/go-controller/pkg/util/pod_annotation.go +++ b/go-controller/pkg/util/pod_annotation.go @@ -54,11 +54,13 @@ const ( OvnPodAnnotationName = "k8s.ovn.org/pod-networks" // DefNetworkAnnotation is the pod annotation for the cluster-wide active network DefNetworkAnnotation = "v1.multus-cni.io/default-network" - // OvnUDNIPAMClaimName is used for workload owners to instruct OVN-K which - // IPAMClaim will hold the allocation for the workload - OvnUDNIPAMClaimName = "k8s.ovn.org/primary-udn-ipamclaim" // UDNOpenPortsAnnotationName is the pod annotation to open default network pods on UDN pods. UDNOpenPortsAnnotationName = "k8s.ovn.org/open-default-ports" + + // DeprecatedOvnUDNIPAMClaimName is used for workload owners to instruct OVN-K which + // IPAMClaim will hold the allocation for the workload. + // Deprecated: Use 'v1.multus-cni.io/default-network' annotation instead, specifying the 'ipam-claim-reference' attribute. + DeprecatedOvnUDNIPAMClaimName = "k8s.ovn.org/primary-udn-ipamclaim" ) var ErrNoPodIPFound = errors.New("no pod IPs found") diff --git a/helm/ovn-kubernetes/README.md b/helm/ovn-kubernetes/README.md index 6cb5a8d952..95d2ef4cf9 100644 --- a/helm/ovn-kubernetes/README.md +++ b/helm/ovn-kubernetes/README.md @@ -140,15 +140,6 @@ false Controls if forwarding is allowed on OVNK controlled interfaces - - global.disableIfaceIdVer - bool -
-false
-
- - Deprecated: iface-id-ver is always enabled - global.disablePacketMtuCheck string diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml index da8b42e48f..c5affe4382 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml @@ -71,6 +71,10 @@ spec: cpu: 100m memory: 300Mi env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL @@ -126,6 +130,8 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_NETWORK_CONNECT_ENABLE + value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: {{ default "" .Values.global.enablePreconfiguredUDNAddresses | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE diff --git a/helm/ovn-kubernetes/charts/ovnkube-db-raft/templates/statefulset.yaml b/helm/ovn-kubernetes/charts/ovnkube-db-raft/templates/statefulset.yaml index f72b880967..a4c854b4de 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-db-raft/templates/statefulset.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-db-raft/templates/statefulset.yaml @@ -236,6 +236,10 @@ spec: cpu: 100m memory: 300Mi env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL diff --git a/helm/ovn-kubernetes/charts/ovnkube-identity/templates/ovnkube-identity.yaml b/helm/ovn-kubernetes/charts/ovnkube-identity/templates/ovnkube-identity.yaml index a170e3ef22..896ba6fbb4 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-identity/templates/ovnkube-identity.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-identity/templates/ovnkube-identity.yaml @@ -57,6 +57,10 @@ spec: - mountPath: /etc/webhook-cert/ name: webhook-cert env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: K8S_APISERVER diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml index b91c2ab65c..8b0fee0ac3 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml @@ -175,6 +175,10 @@ spec: cpu: 100m memory: 300Mi env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml index 450c2994ae..30d920689e 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml @@ -102,6 +102,10 @@ spec: cpu: 100m memory: 300Mi env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml index 0f85d5c1a1..bbcc77ffc9 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml @@ -113,6 +113,10 @@ spec: cpu: 100m memory: 300Mi env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL @@ -211,8 +215,6 @@ spec: value: {{ hasKey .Values.global "enableOvnKubeIdentity" | ternary .Values.global.enableOvnKubeIdentity true | quote }} - name: OVN_SSL_ENABLE value: {{ include "isSslEnabled" . | quote }} - - name: OVN_DISABLE_OVN_IFACE_ID_VER - value: {{ hasKey .Values.global "disableIfaceIdVer" | ternary .Values.global.disableIfaceIdVer false | quote }} - name: OVN_REMOTE_PROBE_INTERVAL value: {{ default 100000 .Values.global.remoteProbeInterval | quote }} - name: OVN_MONITOR_ALL diff --git a/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml b/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml index f0bd81f391..633067a453 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml @@ -113,6 +113,10 @@ spec: cpu: 100m memory: 300Mi env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL @@ -211,8 +215,6 @@ spec: value: {{ hasKey .Values.global "enableOvnKubeIdentity" | ternary .Values.global.enableOvnKubeIdentity true | quote }} - name: OVN_SSL_ENABLE value: {{ include "isSslEnabled" . | quote }} - - name: OVN_DISABLE_OVN_IFACE_ID_VER - value: {{ hasKey .Values.global "disableIfaceIdVer" | ternary .Values.global.disableIfaceIdVer false | quote }} - name: OVN_REMOTE_PROBE_INTERVAL value: {{ default 100000 .Values.global.remoteProbeInterval | quote }} - name: OVN_MONITOR_ALL @@ -229,6 +231,8 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_NETWORK_CONNECT_ENABLE + value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: {{ default "" .Values.global.enablePreconfiguredUDNAddresses | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml index abb2d38133..02e6aca267 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml @@ -289,6 +289,10 @@ spec: cpu: 100m memory: 300Mi env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_EGRESSSERVICE_ENABLE value: {{ default "" .Values.global.enableEgressService | quote }} - name: OVN_DAEMONSET_VERSION @@ -399,8 +403,6 @@ spec: value: {{ default "" .Values.global.extGatewayNetworkInterface | quote }} - name: OVN_SSL_ENABLE value: {{ include "isSslEnabled" . | quote }} - - name: OVN_DISABLE_OVN_IFACE_ID_VER - value: {{ hasKey .Values.global "disableIfaceIdVer" | ternary .Values.global.disableIfaceIdVer false | quote }} - name: OVN_REMOTE_PROBE_INTERVAL value: {{ default 100000 .Values.global.remoteProbeInterval | quote }} - name: OVN_MONITOR_ALL @@ -417,6 +419,8 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_NETWORK_CONNECT_ENABLE + value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: {{ default "" .Values.global.enablePreconfiguredUDNAddresses | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE diff --git a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml index 2cb6f2b86b..137f361564 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml @@ -254,6 +254,10 @@ spec: cpu: 100m memory: 300Mi env: + {{ if .Values.global.enableCoredumps -}} + - name: GOTRACEBACK + value: "crash" + {{ end -}} - name: OVN_DAEMONSET_VERSION value: "1.1.0" - name: OVNKUBE_LOGLEVEL @@ -313,6 +317,8 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_NETWORK_CONNECT_ENABLE + value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: {{ default "" .Values.global.enablePreconfiguredUDNAddresses | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE diff --git a/helm/ovn-kubernetes/values-multi-node-zone.yaml b/helm/ovn-kubernetes/values-multi-node-zone.yaml index 65a2e8ed11..5afa6f6da5 100644 --- a/helm/ovn-kubernetes/values-multi-node-zone.yaml +++ b/helm/ovn-kubernetes/values-multi-node-zone.yaml @@ -113,8 +113,6 @@ global: disableForwarding: "" # -- Disables adding openflow flows to check packets too large to be delivered to OVN due to pod MTU being lower than NIC MTU disablePacketMtuCheck: "" - # -- Deprecated: iface-id-ver is always enabled - disableIfaceIdVer: false # -- The largest number of messages per second that gets logged before drop # @default 20 aclLoggingRateLimit: 20 diff --git a/helm/ovn-kubernetes/values-no-ic.yaml b/helm/ovn-kubernetes/values-no-ic.yaml index a02d849cd9..0f54f5217b 100644 --- a/helm/ovn-kubernetes/values-no-ic.yaml +++ b/helm/ovn-kubernetes/values-no-ic.yaml @@ -105,8 +105,6 @@ global: disableForwarding: "" # -- Disables adding openflow flows to check packets too large to be delivered to OVN due to pod MTU being lower than NIC MTU disablePacketMtuCheck: "" - # -- Deprecated: iface-id-ver is always enabled - disableIfaceIdVer: false # -- Enable/disable requested-chassis option on lsp during pod creation. Must be set to true when cluster has DPUs disableRequestedchassis: false # -- The largest number of messages per second that gets logged before drop diff --git a/helm/ovn-kubernetes/values-single-node-zone.yaml b/helm/ovn-kubernetes/values-single-node-zone.yaml index 221cf45247..c3ea4521d9 100644 --- a/helm/ovn-kubernetes/values-single-node-zone.yaml +++ b/helm/ovn-kubernetes/values-single-node-zone.yaml @@ -114,8 +114,6 @@ global: disableForwarding: "" # -- Disables adding openflow flows to check packets too large to be delivered to OVN due to pod MTU being lower than NIC MTU disablePacketMtuCheck: "" - # -- Deprecated: iface-id-ver is always enabled - disableIfaceIdVer: false # -- The largest number of messages per second that gets logged before drop # @default 20 aclLoggingRateLimit: 20 diff --git a/mkdocs.yml b/mkdocs.yml index 67996ac3c8..ed3172a7a1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -159,5 +159,6 @@ nav: - Dynamic UDN Node Allocation: okeps/okep-5552-dynamic-udn-node-allocation.md - Connecting User Defined Networks: okeps/okep-5224-connecting-udns/okep-5224-connecting-udns.md - No-Overlay Mode: okeps/okep-5259-no-overlay.md + - EVPN: okeps/okep-5088-evpn.md - Blog: - blog/index.md diff --git a/test/Makefile b/test/Makefile index 3602502471..146bddff59 100644 --- a/test/Makefile +++ b/test/Makefile @@ -7,6 +7,26 @@ PLATFORM_IPV4_SUPPORT?=false PLATFORM_IPV6_SUPPORT?=false DUALSTACK_CONVERSION?=false +# Coredump detection settings +# Directory where kind clusters store coredumps +COREDUMP_DIR?=/tmp/kind/logs/coredumps +# Processes to skip when checking for coredumps (pipe-separated for grep) +# https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5782 +SKIPPED_COREDUMPS?=zebra|bgpd|mgmtd + +# Check for coredumps and fail if any are found (excluding skipped processes) +# Usage: $(call check-coredumps) +define check-coredumps + @if [ -d "$(COREDUMP_DIR)" ]; then \ + coredumps=$$(find "$(COREDUMP_DIR)" -maxdepth 1 -type f 2>/dev/null | grep -v -E '$(SKIPPED_COREDUMPS)' || true); \ + if [ -n "$$coredumps" ]; then \ + echo "ERROR: Coredumps found:"; \ + echo "$$coredumps"; \ + exit 1; \ + fi; \ + fi +endef + .PHONY: install-kind install-kind: PLATFORM_IPV4_SUPPORT=$(PLATFORM_IPV4_SUPPORT) \ @@ -26,7 +46,9 @@ shard-%: DUALSTACK_CONVERSION=$(DUALSTACK_CONVERSION) \ SINGLE_NODE_CLUSTER=$(SINGLE_NODE_CLUSTER) \ ./scripts/e2e-kind.sh $@ $(WHAT) + $(call check-coredumps) +# control-plane: coredump check is handled by the test framework (wrappedTestFramework) .PHONY: control-plane control-plane: E2E_REPORT_DIR=$(E2E_REPORT_DIR) \ @@ -41,12 +63,15 @@ conformance: E2E_REPORT_DIR=$(E2E_REPORT_DIR) \ E2E_REPORT_PREFIX=$(JOB_NAME)_ \ ./scripts/conformance.sh + $(call check-coredumps) .PHONY: tools tools: ./scripts/test-ovnkube-trace.sh + $(call check-coredumps) .PHONY: traffic-flow-tests traffic-flow-tests: TRAFFIC_FLOW_TESTS=$(TRAFFIC_FLOW_TESTS) \ ./scripts/traffic-flow-tests.sh $(WHAT) + $(call check-coredumps) diff --git a/test/conformance/go.mod b/test/conformance/go.mod index a1c08527d8..c8e5e1c2fa 100644 --- a/test/conformance/go.mod +++ b/test/conformance/go.mod @@ -9,7 +9,7 @@ require ( k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 sigs.k8s.io/controller-runtime v0.22.1 - sigs.k8s.io/network-policy-api v0.1.7 + sigs.k8s.io/network-policy-api v0.1.8 ) require ( diff --git a/test/conformance/go.sum b/test/conformance/go.sum index f8f1d9bbae..32cc425e5f 100644 --- a/test/conformance/go.sum +++ b/test/conformance/go.sum @@ -182,8 +182,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/network-policy-api v0.1.7 h1:obY2FTEidLXVdRYu7gJ4q1RYE57pBnrpMqoE2LZgp4g= -sigs.k8s.io/network-policy-api v0.1.7/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= +sigs.k8s.io/network-policy-api v0.1.8 h1:p/VY4aX6LqohGx4sH1X3jdQh6BZ/Gb+8DoQhHKC1fZQ= +sigs.k8s.io/network-policy-api v0.1.8/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= diff --git a/test/conformance/network_policy_v2_test.go b/test/conformance/network_policy_v2_test.go index f18bafd456..4724f03551 100644 --- a/test/conformance/network_policy_v2_test.go +++ b/test/conformance/network_policy_v2_test.go @@ -1,7 +1,6 @@ package conformance import ( - "fmt" "os" "testing" "time" @@ -20,13 +19,10 @@ import ( ) const ( - showDebug = true - shouldCleanup = true - NetworkPolicyAPIRepoURL = "https://raw.githubusercontent.com/kubernetes-sigs/network-policy-api/v0.1.5" + showDebug = true + shouldCleanup = true ) -var conformanceTestsBaseManifests = fmt.Sprintf("%s/conformance/base/manifests.yaml", NetworkPolicyAPIRepoURL) - func TestNetworkPolicyV2Conformance(t *testing.T) { t.Log("Configuring environment for network policy V2 API conformance tests") cfg, err := config.GetConfig() @@ -70,8 +66,12 @@ func TestNetworkPolicyV2Conformance(t *testing.T) { suite.SupportAdminNetworkPolicyNamedPorts, suite.SupportBaselineAdminNetworkPolicyNamedPorts, ), - BaseManifests: conformanceTestsBaseManifests, TimeoutConfig: netpolv1config.TimeoutConfig{GetTimeout: 300 * time.Second}, + // Use fixed port range for host network pods. + // Should not intersect with the default ephemeral port range > 32768 + // and any ports used by the default kind cluster components. + HostNetworkPortRangeStart: 11000, + HostNetworkPortRangeEnd: 11010, }, Implementation: confv1a1.Implementation{ Organization: "ovn-org", diff --git a/test/e2e/cluster_network_connect.go b/test/e2e/cluster_network_connect.go new file mode 100644 index 0000000000..ee588986ae --- /dev/null +++ b/test/e2e/cluster_network_connect.go @@ -0,0 +1,1896 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/rand" + clientset "k8s.io/client-go/kubernetes" + e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" + + "github.com/ovn-org/ovn-kubernetes/test/e2e/feature" +) + +const ( + // Annotation keys used by the CNC controller + ovnNetworkConnectSubnetAnnotation = "k8s.ovn.org/network-connect-subnet" + ovnConnectRouterTunnelKeyAnnotation = "k8s.ovn.org/connect-router-tunnel-key" +) + +// cncAnnotationSubnet represents the subnet annotation structure +type cncAnnotationSubnet struct { + IPv4 string `json:"ipv4,omitempty"` + IPv6 string `json:"ipv6,omitempty"` +} + +var _ = Describe("ClusterNetworkConnect ClusterManagerController", feature.NetworkConnect, func() { + f := wrappedTestFramework("cnc-controller") + // disable automatic namespace creation, we need to add the required UDN label + f.SkipNamespaceCreation = true + + var ( + cs clientset.Interface + ) + + const ( + cncConnectSubnetIPv4CIDR = "192.168.0.0/16" + cncConnectSubnetIPv4Prefix = 24 + // IPv6 networkPrefix must satisfy: 32 - ipv4Prefix == 128 - ipv6Prefix + // With ipv4Prefix=24: 32-24=8, so ipv6Prefix must be 128-8=120 + cncConnectSubnetIPv6CIDR = "fd00:10::/112" + cncConnectSubnetIPv6Prefix = 120 + // Layer3 UDN CIDRs with hostSubnet (IPv4: /24, IPv6: /64) + layer3UserDefinedNetworkIPv4CIDR = "172.31.0.0/16" + layer3UserDefinedNetworkIPv4HostSubnet = 24 + layer3UserDefinedNetworkIPv6CIDR = "2014:100:200::0/60" + layer3UserDefinedNetworkIPv6HostSubnet = 64 + // Layer2 UDN CIDRs + layer2UserDefinedNetworkIPv4CIDR = "10.200.0.0/16" + layer2UserDefinedNetworkIPv6CIDR = "2015:100:200::0/60" + ) + + BeforeEach(func() { + cs = f.ClientSet + }) + + // Helper to generate connectSubnets YAML based on cluster IP family support + generateConnectSubnets := func() string { + var subnets []string + if isIPv4Supported(cs) { + subnets = append(subnets, fmt.Sprintf(` - cidr: "%s" + networkPrefix: %d`, cncConnectSubnetIPv4CIDR, cncConnectSubnetIPv4Prefix)) + } + if isIPv6Supported(cs) { + subnets = append(subnets, fmt.Sprintf(` - cidr: "%s" + networkPrefix: %d`, cncConnectSubnetIPv6CIDR, cncConnectSubnetIPv6Prefix)) + } + return strings.Join(subnets, "\n") + } + + // Helper to create a namespace with UDN label + createUDNNamespace := func(baseName string, labels map[string]string) *corev1.Namespace { + if labels == nil { + labels = map[string]string{} + } + labels[RequiredUDNNamespaceLabel] = "" + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: baseName + "-" + rand.String(5), + Labels: labels, + }, + } + createdNs, err := cs.CoreV1().Namespaces().Create(context.Background(), ns, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + return createdNs + } + + // Helper to generate a random CNC name + generateCNCName := func() string { + return fmt.Sprintf("test-cnc-%s", rand.String(5)) + } + + // Helper to create or update a CNC with CUDN and/or PUDN selectors + // Pass nil for a selector type you don't want to use, but at least one must be non-nil + // Uses kubectl apply, so can be called to update an existing CNC + createOrUpdateCNC := func(cncName string, cudnLabelSelector, pudnLabelSelector map[string]string) { + // CNC requires at least one selector (MinItems=1 on NetworkSelectors type) + Expect(cudnLabelSelector != nil || pudnLabelSelector != nil).To(BeTrue(), + "createOrUpdateCNC requires at least one selector (cudnLabelSelector or pudnLabelSelector)") + + var networkSelectors []string + + if cudnLabelSelector != nil { + cudnLabelSelectorStr := "" + for k, v := range cudnLabelSelector { + if cudnLabelSelectorStr != "" { + cudnLabelSelectorStr += "\n " + } + cudnLabelSelectorStr += fmt.Sprintf("%s: \"%s\"", k, v) + } + networkSelectors = append(networkSelectors, fmt.Sprintf(` - networkSelectionType: "ClusterUserDefinedNetworks" + clusterUserDefinedNetworkSelector: + networkSelector: + matchLabels: + %s`, cudnLabelSelectorStr)) + } + + if pudnLabelSelector != nil { + pudnLabelSelectorStr := "" + for k, v := range pudnLabelSelector { + if pudnLabelSelectorStr != "" { + pudnLabelSelectorStr += "\n " + } + pudnLabelSelectorStr += fmt.Sprintf("%s: \"%s\"", k, v) + } + networkSelectors = append(networkSelectors, fmt.Sprintf(` - networkSelectionType: "PrimaryUserDefinedNetworks" + primaryUserDefinedNetworkSelector: + namespaceSelector: + matchLabels: + %s`, pudnLabelSelectorStr)) + } + + manifest := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: ClusterNetworkConnect +metadata: + name: %s +spec: + networkSelectors: +%s + connectSubnets: +%s + connectivity: ["PodNetwork"] +`, cncName, strings.Join(networkSelectors, "\n"), generateConnectSubnets()) + _, err := e2ekubectl.RunKubectlInput("", manifest, "apply", "-f", "-") + Expect(err).NotTo(HaveOccurred()) + } + + // Helper to generate subnets YAML based on topology and cluster IP family support + // Layer3 uses [{cidr: "...", hostSubnet: N}] format, Layer2 uses ["..."] format + generateNetworkSubnets := func(topology string) string { + if topology == "Layer3" { + var subnets []string + if isIPv4Supported(cs) { + subnets = append(subnets, fmt.Sprintf(`{cidr: "%s", hostSubnet: %d}`, layer3UserDefinedNetworkIPv4CIDR, layer3UserDefinedNetworkIPv4HostSubnet)) + } + if isIPv6Supported(cs) { + subnets = append(subnets, fmt.Sprintf(`{cidr: "%s", hostSubnet: %d}`, layer3UserDefinedNetworkIPv6CIDR, layer3UserDefinedNetworkIPv6HostSubnet)) + } + return fmt.Sprintf("[%s]", strings.Join(subnets, ",")) + } + // Layer2 format + var quotedCidrs []string + if isIPv4Supported(cs) { + quotedCidrs = append(quotedCidrs, fmt.Sprintf(`"%s"`, layer2UserDefinedNetworkIPv4CIDR)) + } + if isIPv6Supported(cs) { + quotedCidrs = append(quotedCidrs, fmt.Sprintf(`"%s"`, layer2UserDefinedNetworkIPv6CIDR)) + } + return fmt.Sprintf("[%s]", strings.Join(quotedCidrs, ",")) + } + + // Helper to create a primary CUDN with specified topology + createPrimaryCUDN := func(cudnName, topology string, labels map[string]string, targetNamespaces ...string) { + targetNs := strings.Join(targetNamespaces, ",") + labelAnnotations := "" + for k, v := range labels { + if labelAnnotations != "" { + labelAnnotations += "\n " + } + labelAnnotations += fmt.Sprintf("%s: \"%s\"", k, v) + } + topologyLower := strings.ToLower(topology) + manifest := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: %s + labels: + %s +spec: + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: In + values: [ %s ] + network: + topology: %s + %s: + role: Primary + subnets: %s +`, cudnName, labelAnnotations, targetNs, topology, topologyLower, generateNetworkSubnets(topology)) + _, err := e2ekubectl.RunKubectlInput("", manifest, "apply", "-f", "-") + Expect(err).NotTo(HaveOccurred()) + } + + // Convenience wrappers for Layer3/Layer2 CUDN creation + createLayer3PrimaryCUDN := func(cudnName string, labels map[string]string, targetNamespaces ...string) { + createPrimaryCUDN(cudnName, "Layer3", labels, targetNamespaces...) + } + createLayer2PrimaryCUDN := func(cudnName string, labels map[string]string, targetNamespaces ...string) { + createPrimaryCUDN(cudnName, "Layer2", labels, targetNamespaces...) + } + + // Helper to create a primary UDN with specified topology + createPrimaryUDN := func(namespace, udnName, topology string) { + topologyLower := strings.ToLower(topology) + manifest := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1 +kind: UserDefinedNetwork +metadata: + name: %s +spec: + topology: %s + %s: + role: Primary + subnets: %s +`, udnName, topology, topologyLower, generateNetworkSubnets(topology)) + _, err := e2ekubectl.RunKubectlInput(namespace, manifest, "apply", "-f", "-") + Expect(err).NotTo(HaveOccurred()) + } + + // Convenience wrappers for Layer3/Layer2 UDN creation + createLayer3PrimaryUDN := func(namespace, udnName string) { + createPrimaryUDN(namespace, udnName, "Layer3") + } + createLayer2PrimaryUDN := func(namespace, udnName string) { + createPrimaryUDN(namespace, udnName, "Layer2") + } + + // Helper to delete a CNC + deleteCNC := func(cncName string) { + _, _ = e2ekubectl.RunKubectl("", "delete", "clusternetworkconnect", cncName, "--ignore-not-found") + } + + // Helper to delete a CUDN + deleteCUDN := func(cudnName string) { + _, _ = e2ekubectl.RunKubectl("", "delete", "clusteruserdefinednetwork", cudnName, "--wait", "--timeout=60s", "--ignore-not-found") + } + + // Helper to delete a UDN + deleteUDN := func(namespace, udnName string) { + _, _ = e2ekubectl.RunKubectl(namespace, "delete", "userdefinednetwork", udnName, "--wait", "--timeout=60s", "--ignore-not-found") + } + + // Helper to get CNC annotations + getCNCAnnotations := func(cncName string) (map[string]string, error) { + annotationsJSON, err := e2ekubectl.RunKubectl("", "get", "clusternetworkconnect", cncName, "-o", "jsonpath={.metadata.annotations}") + if err != nil { + return nil, err + } + if annotationsJSON == "" { + return map[string]string{}, nil + } + var annotations map[string]string + if err := json.Unmarshal([]byte(annotationsJSON), &annotations); err != nil { + return nil, err + } + return annotations, nil + } + + // Helper to verify CNC has only tunnel ID annotation + verifyCNCHasOnlyTunnelIDAnnotation := func(cncName string) { + Eventually(func(g Gomega) { + annotations, err := getCNCAnnotations(cncName) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(annotations).To(HaveKey(ovnConnectRouterTunnelKeyAnnotation), "CNC should have tunnel ID annotation") + if subnetAnnotation, exists := annotations[ovnNetworkConnectSubnetAnnotation]; exists { + g.Expect(subnetAnnotation).To(Equal("{}"), "subnet annotation should be empty when no networks match") + } + }, 30*time.Second, 1*time.Second).Should(Succeed()) + } + + // Helper to verify CNC has both annotations + verifyCNCHasBothAnnotations := func(cncName string) { + Eventually(func(g Gomega) { + annotations, err := getCNCAnnotations(cncName) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(annotations).To(HaveKey(ovnConnectRouterTunnelKeyAnnotation), "CNC should have tunnel ID annotation") + g.Expect(annotations).To(HaveKey(ovnNetworkConnectSubnetAnnotation), "CNC should have subnet annotation") + subnetAnnotation := annotations[ovnNetworkConnectSubnetAnnotation] + g.Expect(subnetAnnotation).NotTo(Equal("{}"), "subnet annotation should not be empty when networks match") + var subnets map[string]cncAnnotationSubnet + err = json.Unmarshal([]byte(subnetAnnotation), &subnets) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(subnets)).To(BeNumerically(">", 0), "should have at least one network subnet") + }, 60*time.Second, 2*time.Second).Should(Succeed()) + } + + // Helper to verify CNC subnet annotation count + verifyCNCSubnetAnnotationNetworkCount := func(cncName string, expectedCount int) { + Eventually(func(g Gomega) { + annotations, err := getCNCAnnotations(cncName) + g.Expect(err).NotTo(HaveOccurred()) + subnetAnnotation := annotations[ovnNetworkConnectSubnetAnnotation] + var subnets map[string]cncAnnotationSubnet + err = json.Unmarshal([]byte(subnetAnnotation), &subnets) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(subnets)).To(Equal(expectedCount), fmt.Sprintf("should have %d network subnets", expectedCount)) + }, 60*time.Second, 2*time.Second).Should(Succeed()) + } + + // Helper to verify subnet annotation content: key format, topology counts, and CIDR format + // expectedTopologies is a list of expected topologies (e.g., ["Layer3", "Layer2", "Layer3"]) + verifyCNCSubnetAnnotationContent := func(cncName string, expectedTopologies []string) { + Eventually(func(g Gomega) { + annotations, err := getCNCAnnotations(cncName) + g.Expect(err).NotTo(HaveOccurred()) + subnetAnnotation := annotations[ovnNetworkConnectSubnetAnnotation] + var subnets map[string]cncAnnotationSubnet + err = json.Unmarshal([]byte(subnetAnnotation), &subnets) + g.Expect(err).NotTo(HaveOccurred()) + + // Count topologies found + topologyCounts := map[string]int{"layer2": 0, "layer3": 0} + for networkKey, subnet := range subnets { + // Key format should be _ e.g., "layer3_1", "layer2_2" + g.Expect(networkKey).To(MatchRegexp(`^(layer2|layer3)_\d+$`), + fmt.Sprintf("network key %s should match format _", networkKey)) + + if strings.HasPrefix(networkKey, "layer2_") { + topologyCounts["layer2"]++ + } else if strings.HasPrefix(networkKey, "layer3_") { + topologyCounts["layer3"]++ + } + + // Verify at least one of IPv4 or IPv6 is present + hasIPv4 := subnet.IPv4 != "" + hasIPv6 := subnet.IPv6 != "" + g.Expect(hasIPv4 || hasIPv6).To(BeTrue(), + fmt.Sprintf("network %s should have at least one subnet", networkKey)) + + isLayer2 := strings.HasPrefix(networkKey, "layer2_") + + // Verify IPv4 format if present (should be CIDR within connectSubnets range) + if hasIPv4 { + g.Expect(subnet.IPv4).To(MatchRegexp(`^192\.168\.\d+\.\d+/\d+$`), + fmt.Sprintf("network %s IPv4 subnet should be in connectSubnets range", networkKey)) + // Layer2 networks use point-to-point /31 subnets + if isLayer2 { + g.Expect(subnet.IPv4).To(HaveSuffix("/31"), + fmt.Sprintf("Layer2 network %s IPv4 should have /31 mask", networkKey)) + } + } + + // Verify IPv6 format if present (should be CIDR within connectSubnets range) + if hasIPv6 { + g.Expect(subnet.IPv6).To(MatchRegexp(`^fd00:10::[0-9a-f:]*/\d+$`), + fmt.Sprintf("network %s IPv6 subnet should be in connectSubnets range", networkKey)) + // Layer2 networks use point-to-point /127 subnets + if isLayer2 { + g.Expect(subnet.IPv6).To(HaveSuffix("/127"), + fmt.Sprintf("Layer2 network %s IPv6 should have /127 mask", networkKey)) + } + } + } + + // Verify expected topology counts match + expectedCounts := map[string]int{"layer2": 0, "layer3": 0} + for _, topo := range expectedTopologies { + expectedCounts[strings.ToLower(topo)]++ + } + g.Expect(topologyCounts["layer2"]).To(Equal(expectedCounts["layer2"]), + fmt.Sprintf("expected %d Layer2 networks, got %d", expectedCounts["layer2"], topologyCounts["layer2"])) + g.Expect(topologyCounts["layer3"]).To(Equal(expectedCounts["layer3"]), + fmt.Sprintf("expected %d Layer3 networks, got %d", expectedCounts["layer3"], topologyCounts["layer3"])) + }, 60*time.Second, 2*time.Second).Should(Succeed()) + } + + // Helper to get CNC tunnel ID + getCNCTunnelID := func(cncName string) string { + annotations, err := getCNCAnnotations(cncName) + Expect(err).NotTo(HaveOccurred()) + return annotations[ovnConnectRouterTunnelKeyAnnotation] + } + + // =========================================== + // Group 1: No Matching Networks (1 test) + // =========================================== + Context("when CNC has no matching networks", func() { + It("has only tunnel ID annotation", func() { + cncName := generateCNCName() + DeferCleanup(func() { + deleteCNC(cncName) + }) + + By("creating a CNC with selector that matches no networks") + createOrUpdateCNC(cncName, map[string]string{"nonexistent": "label"}, nil) + + By("verifying CNC has only tunnel ID annotation") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + + By("verifying tunnel ID is valid") + tunnelID := getCNCTunnelID(cncName) + Expect(tunnelID).NotTo(BeEmpty(), "CNC should have tunnel ID even with no matching networks") + }) + }) + + // =========================================== + // Group 2: Static Creation - Networks exist first, then CNC created (7 tests) + // =========================================== + Context("when networks exist before CNC creation", func() { + // Single network tests using DescribeTable + DescribeTable("single network: has both subnet and tunnel ID annotations", + func(topology, kind string) { + cncName := generateCNCName() + networkName := fmt.Sprintf("test-%s-%s", strings.ToLower(kind), rand.String(5)) + testLabel := map[string]string{fmt.Sprintf("test-%s-%s", strings.ToLower(kind), strings.ToLower(topology)): "true"} + + if kind == "UDN" { + ns := createUDNNamespace(fmt.Sprintf("test-%s-%s", strings.ToLower(kind), strings.ToLower(topology)), testLabel) + DeferCleanup(func() { + deleteCNC(cncName) + deleteUDN(ns.Name, networkName) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + }) + + By(fmt.Sprintf("creating a %s primary UDN", topology)) + createPrimaryUDN(ns.Name, networkName, topology) + + By("waiting for UDN to be ready") + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns.Name, networkName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating a CNC with PUDN selector") + createOrUpdateCNC(cncName, nil, testLabel) + } else { + ns := createUDNNamespace(fmt.Sprintf("test-%s-%s", strings.ToLower(kind), strings.ToLower(topology)), nil) + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(networkName) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + }) + + By(fmt.Sprintf("creating a %s primary CUDN", topology)) + createPrimaryCUDN(networkName, topology, testLabel, ns.Name) + + By("waiting for CUDN to be ready") + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, networkName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating a CNC with CUDN selector") + createOrUpdateCNC(cncName, testLabel, nil) + } + + By("verifying CNC has both subnet and tunnel ID annotations") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{topology}) + }, + Entry("L3 P-UDN", "Layer3", "UDN"), + Entry("L2 P-UDN", "Layer2", "UDN"), + Entry("L3 P-CUDN", "Layer3", "CUDN"), + Entry("L2 P-CUDN", "Layer2", "CUDN"), + ) + + // Multiple networks of same kind tests using DescribeTable + DescribeTable("multiple networks (2xL3 + 2xL2): has all networks in subnet annotation", + func(kind string) { + cncName := generateCNCName() + testLabel := map[string]string{fmt.Sprintf("test-multi-%s", strings.ToLower(kind)): "true"} + var namespaces []*corev1.Namespace + var networkNames []string + var expectedTopologies []string + + if kind == "UDN" { + // Create 4 namespaces with the same label for PUDN selector + for i := 1; i <= 4; i++ { + namespaces = append(namespaces, createUDNNamespace(fmt.Sprintf("test-udn-%d", i), testLabel)) + networkNames = append(networkNames, fmt.Sprintf("udn%d", i)) + } + + DeferCleanup(func() { + deleteCNC(cncName) + for i, ns := range namespaces { + deleteUDN(ns.Name, networkNames[i]) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating 2 Layer3 and 2 Layer2 primary UDNs") + createLayer3PrimaryUDN(namespaces[0].Name, networkNames[0]) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryUDN(namespaces[1].Name, networkNames[1]) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer2PrimaryUDN(namespaces[2].Name, networkNames[2]) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryUDN(namespaces[3].Name, networkNames[3]) + expectedTopologies = append(expectedTopologies, "Layer2") + + By("waiting for all UDNs to be ready") + for i, ns := range namespaces { + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns.Name, networkNames[i]), 30*time.Second, time.Second).Should(Succeed()) + } + + By("creating a CNC with PUDN selector") + createOrUpdateCNC(cncName, nil, testLabel) + } else { + // CUDN case - one CUDN targets multiple namespaces + for i := 1; i <= 5; i++ { // 5 namespaces for multi-ns CUDN test + namespaces = append(namespaces, createUDNNamespace(fmt.Sprintf("test-cudn-ns%d", i), nil)) + networkNames = append(networkNames, fmt.Sprintf("cudn-%d-%s", i, rand.String(5))) + } + + DeferCleanup(func() { + deleteCNC(cncName) + for i := 0; i < 4; i++ { // only 4 CUDNs + deleteCUDN(networkNames[i]) + } + for _, ns := range namespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating 2 Layer3 and 2 Layer2 primary CUDNs (one L3 targets multiple namespaces)") + createLayer3PrimaryCUDN(networkNames[0], testLabel, namespaces[0].Name) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryCUDN(networkNames[1], testLabel, namespaces[1].Name, namespaces[4].Name) // multi-ns + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer2PrimaryCUDN(networkNames[2], testLabel, namespaces[2].Name) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryCUDN(networkNames[3], testLabel, namespaces[3].Name) + expectedTopologies = append(expectedTopologies, "Layer2") + + By("waiting for all CUDNs to be ready") + for i := 0; i < 4; i++ { + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, networkNames[i]), 30*time.Second, time.Second).Should(Succeed()) + } + + By("creating a CNC with CUDN selector") + createOrUpdateCNC(cncName, testLabel, nil) + } + + By("verifying CNC has 4 networks in subnet annotation") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 4) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + }, + Entry("P-UDNs", "UDN"), + Entry("P-CUDNs (one multi-ns)", "CUDN"), + ) + + It("full matrix (2x each type) - has all 8 networks in subnet annotation", func() { + cncName := generateCNCName() + cudnLabel := map[string]string{"test-full-matrix": "true"} + pudnLabel := map[string]string{"test-full-matrix": "true"} + + var cudnNames []string + var udnNames []string + var cudnNamespaces []*corev1.Namespace + var udnNamespaces []*corev1.Namespace + var expectedTopologies []string + + // Create namespaces and network names + for i := 1; i <= 4; i++ { + cudnNames = append(cudnNames, fmt.Sprintf("fm-cudn-%d-%s", i, rand.String(5))) + udnNames = append(udnNames, fmt.Sprintf("udn%d", i)) + cudnNamespaces = append(cudnNamespaces, createUDNNamespace(fmt.Sprintf("fm-cudn-ns%d", i), nil)) + udnNamespaces = append(udnNamespaces, createUDNNamespace(fmt.Sprintf("fm-udn-ns%d", i), pudnLabel)) + } + + DeferCleanup(func() { + deleteCNC(cncName) + for _, name := range cudnNames { + deleteCUDN(name) + } + for i, ns := range udnNamespaces { + deleteUDN(ns.Name, udnNames[i]) + } + for _, ns := range cudnNamespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + for _, ns := range udnNamespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating 4 CUDNs (2xL3 + 2xL2)") + createLayer3PrimaryCUDN(cudnNames[0], cudnLabel, cudnNamespaces[0].Name) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryCUDN(cudnNames[1], cudnLabel, cudnNamespaces[1].Name) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer2PrimaryCUDN(cudnNames[2], cudnLabel, cudnNamespaces[2].Name) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryCUDN(cudnNames[3], cudnLabel, cudnNamespaces[3].Name) + expectedTopologies = append(expectedTopologies, "Layer2") + + By("creating 4 UDNs (2xL3 + 2xL2)") + createLayer3PrimaryUDN(udnNamespaces[0].Name, udnNames[0]) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryUDN(udnNamespaces[1].Name, udnNames[1]) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer2PrimaryUDN(udnNamespaces[2].Name, udnNames[2]) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryUDN(udnNamespaces[3].Name, udnNames[3]) + expectedTopologies = append(expectedTopologies, "Layer2") + + By("waiting for all networks to be ready") + for _, name := range cudnNames { + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, name), 30*time.Second, time.Second).Should(Succeed()) + } + for i, ns := range udnNamespaces { + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns.Name, udnNames[i]), 30*time.Second, time.Second).Should(Succeed()) + } + + By("creating a CNC with both CUDN and PUDN selectors") + createOrUpdateCNC(cncName, cudnLabel, pudnLabel) + + By("verifying CNC has all 8 networks in subnet annotation") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 8) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + }) + }) + + // =========================================== + // Group 3: Dynamic Creation - CNC created first, then networks (7 tests) + // =========================================== + Context("when CNC is created before networks", func() { + // Single network tests using DescribeTable + DescribeTable("single network created after CNC: annotations are updated", + func(topology, kind string) { + cncName := generateCNCName() + networkName := fmt.Sprintf("test-%s-%s", strings.ToLower(kind), rand.String(5)) + testLabel := map[string]string{fmt.Sprintf("test-dyn-%s-%s", strings.ToLower(kind), strings.ToLower(topology)): "true"} + var expectedTopologies []string + + if kind == "UDN" { + ns := createUDNNamespace(fmt.Sprintf("test-dyn-%s-%s", strings.ToLower(kind), strings.ToLower(topology)), testLabel) + DeferCleanup(func() { + deleteCNC(cncName) + deleteUDN(ns.Name, networkName) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + }) + + By("creating a CNC with PUDN selector (no matching networks yet)") + createOrUpdateCNC(cncName, nil, testLabel) + + By("verifying CNC has only tunnel ID annotation initially") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + + By(fmt.Sprintf("creating a %s primary UDN", topology)) + createPrimaryUDN(ns.Name, networkName, topology) + expectedTopologies = append(expectedTopologies, topology) + + By("waiting for UDN to be ready") + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns.Name, networkName), 30*time.Second, time.Second).Should(Succeed()) + } else { + ns := createUDNNamespace(fmt.Sprintf("test-dyn-%s-%s", strings.ToLower(kind), strings.ToLower(topology)), nil) + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(networkName) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + }) + + By("creating a CNC with CUDN selector (no matching networks yet)") + createOrUpdateCNC(cncName, testLabel, nil) + + By("verifying CNC has only tunnel ID annotation initially") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + + By(fmt.Sprintf("creating a %s primary CUDN", topology)) + createPrimaryCUDN(networkName, topology, testLabel, ns.Name) + expectedTopologies = append(expectedTopologies, topology) + + By("waiting for CUDN to be ready") + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, networkName), 30*time.Second, time.Second).Should(Succeed()) + } + + By("verifying CNC annotations are updated to include the network") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + }, + Entry("L3 P-UDN", "Layer3", "UDN"), + Entry("L2 P-UDN", "Layer2", "UDN"), + Entry("L3 P-CUDN", "Layer3", "CUDN"), + Entry("L2 P-CUDN", "Layer2", "CUDN"), + ) + + // Multiple networks created after CNC + DescribeTable("multiple networks created after CNC: annotations are updated", + func(kind string) { + cncName := generateCNCName() + testLabel := map[string]string{fmt.Sprintf("test-dyn-multi-%s", strings.ToLower(kind)): "true"} + var namespaces []*corev1.Namespace + var networkNames []string + var expectedTopologies []string + + if kind == "UDN" { + // Create namespaces first (with label for PUDN selector) + for i := 1; i <= 4; i++ { + namespaces = append(namespaces, createUDNNamespace(fmt.Sprintf("test-dyn-udn-%d", i), testLabel)) + networkNames = append(networkNames, fmt.Sprintf("udn%d", i)) + } + + DeferCleanup(func() { + deleteCNC(cncName) + for i, ns := range namespaces { + deleteUDN(ns.Name, networkNames[i]) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating a CNC with PUDN selector (no matching networks yet)") + createOrUpdateCNC(cncName, nil, testLabel) + + By("verifying CNC has only tunnel ID annotation initially") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + + By("creating 2 Layer3 and 2 Layer2 primary UDNs") + createLayer3PrimaryUDN(namespaces[0].Name, networkNames[0]) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryUDN(namespaces[1].Name, networkNames[1]) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer2PrimaryUDN(namespaces[2].Name, networkNames[2]) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryUDN(namespaces[3].Name, networkNames[3]) + expectedTopologies = append(expectedTopologies, "Layer2") + + By("waiting for all UDNs to be ready") + for i, ns := range namespaces { + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns.Name, networkNames[i]), 30*time.Second, time.Second).Should(Succeed()) + } + } else { + // CUDN case + for i := 1; i <= 5; i++ { + namespaces = append(namespaces, createUDNNamespace(fmt.Sprintf("test-dyn-cudn-ns%d", i), nil)) + networkNames = append(networkNames, fmt.Sprintf("dyn-cudn-%d-%s", i, rand.String(5))) + } + + DeferCleanup(func() { + deleteCNC(cncName) + for i := 0; i < 4; i++ { + deleteCUDN(networkNames[i]) + } + for _, ns := range namespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating a CNC with CUDN selector (no matching networks yet)") + createOrUpdateCNC(cncName, testLabel, nil) + + By("verifying CNC has only tunnel ID annotation initially") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + + By("creating 2 Layer3 and 2 Layer2 primary CUDNs (one L3 targets multiple namespaces)") + createLayer3PrimaryCUDN(networkNames[0], testLabel, namespaces[0].Name) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryCUDN(networkNames[1], testLabel, namespaces[1].Name, namespaces[4].Name) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer2PrimaryCUDN(networkNames[2], testLabel, namespaces[2].Name) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryCUDN(networkNames[3], testLabel, namespaces[3].Name) + expectedTopologies = append(expectedTopologies, "Layer2") + + By("waiting for all CUDNs to be ready") + for i := 0; i < 4; i++ { + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, networkNames[i]), 30*time.Second, time.Second).Should(Succeed()) + } + } + + By("verifying CNC has 4 networks in subnet annotation") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 4) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + }, + Entry("P-UDNs", "UDN"), + Entry("P-CUDNs (one multi-ns)", "CUDN"), + ) + + It("full matrix created after CNC - annotations are updated with all 8 networks", func() { + cncName := generateCNCName() + cudnLabel := map[string]string{"test-dyn-full-matrix": "true"} + pudnLabel := map[string]string{"test-dyn-full-matrix": "true"} + + var cudnNames []string + var udnNames []string + var cudnNamespaces []*corev1.Namespace + var udnNamespaces []*corev1.Namespace + var expectedTopologies []string + + // Create namespaces first + for i := 1; i <= 4; i++ { + cudnNames = append(cudnNames, fmt.Sprintf("dyn-fm-cudn-%d-%s", i, rand.String(5))) + udnNames = append(udnNames, fmt.Sprintf("udn%d", i)) + cudnNamespaces = append(cudnNamespaces, createUDNNamespace(fmt.Sprintf("dyn-fm-cudn-ns%d", i), nil)) + udnNamespaces = append(udnNamespaces, createUDNNamespace(fmt.Sprintf("dyn-fm-udn-ns%d", i), pudnLabel)) + } + + DeferCleanup(func() { + deleteCNC(cncName) + for _, name := range cudnNames { + deleteCUDN(name) + } + for i, ns := range udnNamespaces { + deleteUDN(ns.Name, udnNames[i]) + } + for _, ns := range cudnNamespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + for _, ns := range udnNamespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating a CNC with both CUDN and PUDN selectors (no matching networks yet)") + createOrUpdateCNC(cncName, cudnLabel, pudnLabel) + + By("verifying CNC has only tunnel ID annotation initially") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + + By("creating 4 CUDNs (2xL3 + 2xL2)") + createLayer3PrimaryCUDN(cudnNames[0], cudnLabel, cudnNamespaces[0].Name) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryCUDN(cudnNames[1], cudnLabel, cudnNamespaces[1].Name) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer2PrimaryCUDN(cudnNames[2], cudnLabel, cudnNamespaces[2].Name) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryCUDN(cudnNames[3], cudnLabel, cudnNamespaces[3].Name) + expectedTopologies = append(expectedTopologies, "Layer2") + + By("creating 4 UDNs (2xL3 + 2xL2)") + createLayer3PrimaryUDN(udnNamespaces[0].Name, udnNames[0]) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryUDN(udnNamespaces[1].Name, udnNames[1]) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer2PrimaryUDN(udnNamespaces[2].Name, udnNames[2]) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryUDN(udnNamespaces[3].Name, udnNames[3]) + expectedTopologies = append(expectedTopologies, "Layer2") + + By("waiting for all networks to be ready") + for _, name := range cudnNames { + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, name), 30*time.Second, time.Second).Should(Succeed()) + } + for i, ns := range udnNamespaces { + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns.Name, udnNames[i]), 30*time.Second, time.Second).Should(Succeed()) + } + + By("verifying CNC has all 8 networks in subnet annotation") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 8) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + }) + }) + + // =========================================== + // Group 4: Adding Networks - networks added to existing CNC (4 tests) + // =========================================== + Context("when networks are added to existing CNC", func() { + // Adding single network to CNC with existing networks + DescribeTable("adding a network to CNC with existing networks: count increases", + func(initialTopology, addedTopology, kind string) { + cncName := generateCNCName() + testLabel := map[string]string{fmt.Sprintf("test-add-%s", strings.ToLower(kind)): "true"} + var namespaces []*corev1.Namespace + var networkNames []string + var expectedTopologies []string + + if kind == "UDN" { + // Create 2 namespaces - one for initial, one for added + for i := 1; i <= 2; i++ { + namespaces = append(namespaces, createUDNNamespace(fmt.Sprintf("test-add-udn-%d", i), testLabel)) + networkNames = append(networkNames, fmt.Sprintf("udn%d", i)) + } + + DeferCleanup(func() { + deleteCNC(cncName) + for i, ns := range namespaces { + deleteUDN(ns.Name, networkNames[i]) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By(fmt.Sprintf("creating initial %s primary UDN", initialTopology)) + createPrimaryUDN(namespaces[0].Name, networkNames[0], initialTopology) + expectedTopologies = append(expectedTopologies, initialTopology) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, namespaces[0].Name, networkNames[0]), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with PUDN selector") + createOrUpdateCNC(cncName, nil, testLabel) + + By("verifying CNC has 1 network in subnet annotation") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + + By(fmt.Sprintf("adding a %s primary UDN", addedTopology)) + createPrimaryUDN(namespaces[1].Name, networkNames[1], addedTopology) + expectedTopologies = append(expectedTopologies, addedTopology) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, namespaces[1].Name, networkNames[1]), 30*time.Second, time.Second).Should(Succeed()) + } else { + // CUDN case + for i := 1; i <= 2; i++ { + namespaces = append(namespaces, createUDNNamespace(fmt.Sprintf("test-add-cudn-ns%d", i), nil)) + networkNames = append(networkNames, fmt.Sprintf("add-cudn-%d-%s", i, rand.String(5))) + } + + DeferCleanup(func() { + deleteCNC(cncName) + for _, name := range networkNames { + deleteCUDN(name) + } + for _, ns := range namespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By(fmt.Sprintf("creating initial %s primary CUDN", initialTopology)) + createPrimaryCUDN(networkNames[0], initialTopology, testLabel, namespaces[0].Name) + expectedTopologies = append(expectedTopologies, initialTopology) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, networkNames[0]), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with CUDN selector") + createOrUpdateCNC(cncName, testLabel, nil) + + By("verifying CNC has 1 network in subnet annotation") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + + By(fmt.Sprintf("adding a %s primary CUDN", addedTopology)) + createPrimaryCUDN(networkNames[1], addedTopology, testLabel, namespaces[1].Name) + expectedTopologies = append(expectedTopologies, addedTopology) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, networkNames[1]), 30*time.Second, time.Second).Should(Succeed()) + } + + By("verifying CNC now has 2 networks in subnet annotation") + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + }, + Entry("add L2 P-UDN to L3 P-UDN", "Layer3", "Layer2", "UDN"), + Entry("add L3 P-UDN to L2 P-UDN", "Layer2", "Layer3", "UDN"), + Entry("add L2 P-CUDN to L3 P-CUDN", "Layer3", "Layer2", "CUDN"), + Entry("add L3 P-CUDN to L2 P-CUDN", "Layer2", "Layer3", "CUDN"), + ) + + It("adding mixed networks (P-UDN + P-CUDN) to existing CNC - all networks appear", func() { + cncName := generateCNCName() + cudnLabel := map[string]string{"test-add-mixed": "true"} + pudnLabel := map[string]string{"test-add-mixed": "true"} + var expectedTopologies []string + + // Initial: 1 L3 CUDN + 1 L3 UDN + initialCudnName := fmt.Sprintf("add-mixed-cudn-init-%s", rand.String(5)) + initialUdnName := "udn-init" + cudnNs := createUDNNamespace("test-add-mixed-cudn", nil) + udnNs := createUDNNamespace("test-add-mixed-udn", pudnLabel) + + // Added: 1 L2 CUDN + 1 L2 UDN + addedCudnName := fmt.Sprintf("add-mixed-cudn-add-%s", rand.String(5)) + addedUdnName := "udn-add" + addedCudnNs := createUDNNamespace("test-add-mixed-cudn2", nil) + addedUdnNs := createUDNNamespace("test-add-mixed-udn2", pudnLabel) + + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(initialCudnName) + deleteCUDN(addedCudnName) + deleteUDN(udnNs.Name, initialUdnName) + deleteUDN(addedUdnNs.Name, addedUdnName) + for _, ns := range []*corev1.Namespace{cudnNs, udnNs, addedCudnNs, addedUdnNs} { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating initial L3 CUDN and L3 UDN") + createLayer3PrimaryCUDN(initialCudnName, cudnLabel, cudnNs.Name) + expectedTopologies = append(expectedTopologies, "Layer3") + createLayer3PrimaryUDN(udnNs.Name, initialUdnName) + expectedTopologies = append(expectedTopologies, "Layer3") + + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, initialCudnName), 30*time.Second, time.Second).Should(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udnNs.Name, initialUdnName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with both selectors") + createOrUpdateCNC(cncName, cudnLabel, pudnLabel) + + By("verifying CNC has 2 networks initially") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + + By("adding L2 CUDN and L2 UDN") + createLayer2PrimaryCUDN(addedCudnName, cudnLabel, addedCudnNs.Name) + expectedTopologies = append(expectedTopologies, "Layer2") + createLayer2PrimaryUDN(addedUdnNs.Name, addedUdnName) + expectedTopologies = append(expectedTopologies, "Layer2") + + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, addedCudnName), 30*time.Second, time.Second).Should(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, addedUdnNs.Name, addedUdnName), 30*time.Second, time.Second).Should(Succeed()) + + By("verifying CNC now has 4 networks") + verifyCNCSubnetAnnotationNetworkCount(cncName, 4) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + }) + }) + + // =========================================== + // Group 5: Network Deletion - networks removed from CNC (4 tests) + // =========================================== + Context("when networks are deleted from CNC", func() { + // Deleting single network from CNC with multiple networks + DescribeTable("deleting networks from CNC: count decreases to zero", + func(topology, kind string) { + cncName := generateCNCName() + testLabel := map[string]string{fmt.Sprintf("test-del-%s", strings.ToLower(kind)): "true"} + var namespaces []*corev1.Namespace + var networkNames []string + + if kind == "UDN" { + // Create 2 namespaces with 2 networks + for i := 1; i <= 2; i++ { + namespaces = append(namespaces, createUDNNamespace(fmt.Sprintf("test-del-udn-%d", i), testLabel)) + networkNames = append(networkNames, fmt.Sprintf("udn%d", i)) + } + + DeferCleanup(func() { + deleteCNC(cncName) + // Networks already deleted in test + for _, ns := range namespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating 2 primary UDNs (L3 + topology)") + createLayer3PrimaryUDN(namespaces[0].Name, networkNames[0]) + createPrimaryUDN(namespaces[1].Name, networkNames[1], topology) + for i, ns := range namespaces { + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns.Name, networkNames[i]), 30*time.Second, time.Second).Should(Succeed()) + } + + By("creating CNC with PUDN selector") + createOrUpdateCNC(cncName, nil, testLabel) + + By("verifying CNC has 2 networks initially") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", topology}) + + By(fmt.Sprintf("deleting the %s UDN", topology)) + deleteUDN(namespaces[1].Name, networkNames[1]) + + By("verifying CNC now has 1 network") + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3"}) + + By("deleting the remaining L3 UDN") + deleteUDN(namespaces[0].Name, networkNames[0]) + } else { + // CUDN case + for i := 1; i <= 2; i++ { + namespaces = append(namespaces, createUDNNamespace(fmt.Sprintf("test-del-cudn-ns%d", i), nil)) + networkNames = append(networkNames, fmt.Sprintf("del-cudn-%d-%s", i, rand.String(5))) + } + + DeferCleanup(func() { + deleteCNC(cncName) + // Networks already deleted in test + for _, ns := range namespaces { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating 2 primary CUDNs (L3 + topology)") + createLayer3PrimaryCUDN(networkNames[0], testLabel, namespaces[0].Name) + createPrimaryCUDN(networkNames[1], topology, testLabel, namespaces[1].Name) + for i := 0; i < 2; i++ { + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, networkNames[i]), 30*time.Second, time.Second).Should(Succeed()) + } + + By("creating CNC with CUDN selector") + createOrUpdateCNC(cncName, testLabel, nil) + + By("verifying CNC has 2 networks initially") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", topology}) + + By(fmt.Sprintf("deleting the %s CUDN", topology)) + deleteCUDN(networkNames[1]) + + By("verifying CNC now has 1 network") + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3"}) + + By("deleting the remaining L3 CUDN") + deleteCUDN(networkNames[0]) + } + + By("verifying CNC reverts to only tunnel ID annotation") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + }, + Entry("delete L2 then L3 P-UDN", "Layer2", "UDN"), + Entry("delete L3 then L3 P-UDN", "Layer3", "UDN"), + Entry("delete L2 then L3 P-CUDN", "Layer2", "CUDN"), + Entry("delete L3 then L3 P-CUDN", "Layer3", "CUDN"), + ) + + It("deleting mixed networks (P-UDN + P-CUDN) - annotations update correctly", func() { + cncName := generateCNCName() + cudnLabel := map[string]string{"test-del-mixed": "true"} + pudnLabel := map[string]string{"test-del-mixed": "true"} + + // Create 2 CUDNs + 2 UDNs + cudnNs1 := createUDNNamespace("test-del-mixed-cudn1", nil) + cudnNs2 := createUDNNamespace("test-del-mixed-cudn2", nil) + udnNs1 := createUDNNamespace("test-del-mixed-udn1", pudnLabel) + udnNs2 := createUDNNamespace("test-del-mixed-udn2", pudnLabel) + + cudnName1 := fmt.Sprintf("del-mixed-cudn1-%s", rand.String(5)) + cudnName2 := fmt.Sprintf("del-mixed-cudn2-%s", rand.String(5)) + udnName1 := "udn1" + udnName2 := "udn2" + + DeferCleanup(func() { + deleteCNC(cncName) + // Only delete remaining networks (others deleted in test) + deleteCUDN(cudnName1) + deleteUDN(udnNs1.Name, udnName1) + for _, ns := range []*corev1.Namespace{cudnNs1, cudnNs2, udnNs1, udnNs2} { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + By("creating 2 CUDNs (L3 + L2) and 2 UDNs (L3 + L2)") + createLayer3PrimaryCUDN(cudnName1, cudnLabel, cudnNs1.Name) + createLayer2PrimaryCUDN(cudnName2, cudnLabel, cudnNs2.Name) + createLayer3PrimaryUDN(udnNs1.Name, udnName1) + createLayer2PrimaryUDN(udnNs2.Name, udnName2) + + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName2), 30*time.Second, time.Second).Should(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udnNs1.Name, udnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udnNs2.Name, udnName2), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with both selectors") + createOrUpdateCNC(cncName, cudnLabel, pudnLabel) + + By("verifying CNC has 4 networks initially") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 4) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer2", "Layer3", "Layer2"}) + + By("deleting L2 CUDN and L2 UDN") + deleteCUDN(cudnName2) + deleteUDN(udnNs2.Name, udnName2) + + By("verifying CNC has 2 L3 networks remaining") + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer3"}) + + By("deleting L3 CUDN and L3 UDN") + deleteCUDN(cudnName1) + deleteUDN(udnNs1.Name, udnName1) + + By("verifying CNC has no networks remaining") + verifyCNCSubnetAnnotationNetworkCount(cncName, 0) + verifyCNCSubnetAnnotationContent(cncName, []string{}) + }) + }) + + // =========================================== + // Group 6: CNC Selector Update - CNC spec.networkSelectors changed (4 tests) + // =========================================== + Context("when CNC selector is updated", func() { + It("widening then narrowing CUDN selector - count increases then decreases", func() { + cncName := generateCNCName() + commonLabel := map[string]string{"test-cudn-sel": "true"} + specificLabel := map[string]string{"test-cudn-sel": "true", "specific": "true"} + + ns1 := createUDNNamespace("test-cudn-sel-ns1", nil) + ns2 := createUDNNamespace("test-cudn-sel-ns2", nil) + cudnName1 := fmt.Sprintf("cudn-sel1-%s", rand.String(5)) + cudnName2 := fmt.Sprintf("cudn-sel2-%s", rand.String(5)) + + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(cudnName1) + deleteCUDN(cudnName2) + cs.CoreV1().Namespaces().Delete(context.Background(), ns1.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), ns2.Name, metav1.DeleteOptions{}) + }) + + By("creating 2 CUDNs - both with common label, second also has specific label") + createLayer3PrimaryCUDN(cudnName1, commonLabel, ns1.Name) + createLayer2PrimaryCUDN(cudnName2, specificLabel, ns2.Name) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName2), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with specific selector (matches only second CUDN)") + createOrUpdateCNC(cncName, specificLabel, nil) + + By("verifying CNC has 1 network initially") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2"}) + + By("widening CNC selector to common label - count increases") + createOrUpdateCNC(cncName, commonLabel, nil) + + By("verifying CNC now has 2 networks") + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer2"}) + + By("narrowing CNC selector back to specific - count decreases") + createOrUpdateCNC(cncName, specificLabel, nil) + + By("verifying CNC now has 1 network") + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2"}) + }) + + It("widening then narrowing PUDN namespace selector - count increases then decreases", func() { + cncName := generateCNCName() + commonLabel := map[string]string{"test-pudn-sel": "true"} + specificLabel := map[string]string{"test-pudn-sel": "true", "specific": "true"} + + ns1 := createUDNNamespace("test-pudn-sel-ns1", commonLabel) + ns2 := createUDNNamespace("test-pudn-sel-ns2", specificLabel) + udnName1 := "udn1" + udnName2 := "udn2" + + DeferCleanup(func() { + deleteCNC(cncName) + deleteUDN(ns1.Name, udnName1) + deleteUDN(ns2.Name, udnName2) + cs.CoreV1().Namespaces().Delete(context.Background(), ns1.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), ns2.Name, metav1.DeleteOptions{}) + }) + + By("creating 2 UDNs in namespaces - both with common label, second also has specific") + createLayer3PrimaryUDN(ns1.Name, udnName1) + createLayer2PrimaryUDN(ns2.Name, udnName2) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns1.Name, udnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns2.Name, udnName2), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with specific selector (matches only second namespace)") + createOrUpdateCNC(cncName, nil, specificLabel) + + By("verifying CNC has 1 network initially") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2"}) + + By("widening CNC selector to common label - count increases") + createOrUpdateCNC(cncName, nil, commonLabel) + + By("verifying CNC now has 2 networks") + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer2"}) + + By("narrowing CNC selector back to specific - count decreases") + createOrUpdateCNC(cncName, nil, specificLabel) + + By("verifying CNC now has 1 network") + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2"}) + }) + + It("adding and removing PUDN selector from CNC - count increases then decreases", func() { + cncName := generateCNCName() + cudnLabel := map[string]string{"test-toggle-pudn-sel": "true"} + pudnLabel := map[string]string{"test-toggle-pudn-sel": "true"} + + cudnNs := createUDNNamespace("test-toggle-pudn-sel-cudn", nil) + udnNs := createUDNNamespace("test-toggle-pudn-sel-udn", pudnLabel) + cudnName := fmt.Sprintf("toggle-pudn-sel-cudn-%s", rand.String(5)) + udnName := "udn1" + + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(cudnName) + deleteUDN(udnNs.Name, udnName) + cs.CoreV1().Namespaces().Delete(context.Background(), cudnNs.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), udnNs.Name, metav1.DeleteOptions{}) + }) + + By("creating L3 CUDN and L2 UDN") + createLayer3PrimaryCUDN(cudnName, cudnLabel, cudnNs.Name) + createLayer2PrimaryUDN(udnNs.Name, udnName) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName), 30*time.Second, time.Second).Should(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udnNs.Name, udnName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with only CUDN selector") + createOrUpdateCNC(cncName, cudnLabel, nil) + + By("verifying CNC has 1 network initially (CUDN only)") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3"}) + + By("adding PUDN selector to CNC - count increases") + createOrUpdateCNC(cncName, cudnLabel, pudnLabel) + + By("verifying CNC now has 2 networks (CUDN + PUDN)") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer2"}) + + By("removing PUDN selector from CNC - count decreases") + createOrUpdateCNC(cncName, cudnLabel, nil) + + By("verifying CNC now has 1 network (CUDN only)") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3"}) + }) + + It("adding and removing CUDN selector from CNC - count increases then decreases", func() { + cncName := generateCNCName() + cudnLabel := map[string]string{"test-toggle-cudn-sel": "true"} + pudnLabel := map[string]string{"test-toggle-cudn-sel": "true"} + + cudnNs := createUDNNamespace("test-toggle-cudn-sel-cudn", nil) + udnNs := createUDNNamespace("test-toggle-cudn-sel-udn", pudnLabel) + cudnName := fmt.Sprintf("toggle-cudn-sel-cudn-%s", rand.String(5)) + udnName := "udn1" + + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(cudnName) + deleteUDN(udnNs.Name, udnName) + cs.CoreV1().Namespaces().Delete(context.Background(), cudnNs.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), udnNs.Name, metav1.DeleteOptions{}) + }) + + By("creating L3 CUDN and L2 UDN") + createLayer3PrimaryCUDN(cudnName, cudnLabel, cudnNs.Name) + createLayer2PrimaryUDN(udnNs.Name, udnName) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName), 30*time.Second, time.Second).Should(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udnNs.Name, udnName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with only PUDN selector") + createOrUpdateCNC(cncName, nil, pudnLabel) + + By("verifying CNC has 1 network initially (PUDN only)") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2"}) + + By("adding CUDN selector to CNC - count increases") + createOrUpdateCNC(cncName, cudnLabel, pudnLabel) + + By("verifying CNC now has 2 networks (CUDN + PUDN)") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer2"}) + + By("removing CUDN selector from CNC - count decreases") + createOrUpdateCNC(cncName, nil, pudnLabel) + + By("verifying CNC now has 1 network (PUDN only)") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2"}) + + By("changing PUDN selector to non-matching label - count decreases to 0") + createOrUpdateCNC(cncName, nil, map[string]string{"nonexistent": "label"}) + + By("verifying CNC has no networks remaining") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) // No networks match, so subnet annotation is empty + verifyCNCSubnetAnnotationNetworkCount(cncName, 0) + }) + }) + + // =========================================== + // Group 7: Label Mutation - network/namespace labels changed (2 tests) + // =========================================== + Context("when network or namespace labels are mutated", func() { + It("CUDN label mutation - adding then removing label changes CNC count", func() { + cncName := generateCNCName() + cncLabel := map[string]string{"test-cudn-label": "true"} + + ns1 := createUDNNamespace("test-cudn-label-ns1", nil) + ns2 := createUDNNamespace("test-cudn-label-ns2", nil) + cudnName1 := fmt.Sprintf("cudn-label1-%s", rand.String(5)) + cudnName2 := fmt.Sprintf("cudn-label2-%s", rand.String(5)) + + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(cudnName1) + deleteCUDN(cudnName2) + cs.CoreV1().Namespaces().Delete(context.Background(), ns1.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), ns2.Name, metav1.DeleteOptions{}) + }) + + By("creating 2 CUDNs - first with matching label, second without") + createLayer3PrimaryCUDN(cudnName1, cncLabel, ns1.Name) + createLayer2PrimaryCUDN(cudnName2, map[string]string{"other": "label"}, ns2.Name) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName2), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with CUDN selector") + createOrUpdateCNC(cncName, cncLabel, nil) + + By("verifying CNC has 1 network initially") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3"}) + + By("adding matching label to second CUDN - count increases") + _, err := e2ekubectl.RunKubectl("", "label", "clusteruserdefinednetwork", cudnName2, "test-cudn-label=true") + Expect(err).NotTo(HaveOccurred()) + + By("verifying CNC now has 2 networks") + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer2"}) + + By("removing matching label from second CUDN - count decreases") + _, err = e2ekubectl.RunKubectl("", "label", "clusteruserdefinednetwork", cudnName2, "test-cudn-label-") + Expect(err).NotTo(HaveOccurred()) + + By("verifying CNC now has 1 network") + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3"}) + + By("removing matching label from first CUDN - count decreases to 0") + _, err = e2ekubectl.RunKubectl("", "label", "clusteruserdefinednetwork", cudnName1, "test-cudn-label-") + Expect(err).NotTo(HaveOccurred()) + + By("verifying CNC has no networks remaining") + verifyCNCSubnetAnnotationNetworkCount(cncName, 0) + verifyCNCSubnetAnnotationContent(cncName, []string{}) + }) + + It("namespace label mutation - adding then removing label changes CNC count", func() { + cncName := generateCNCName() + cncLabel := map[string]string{"test-ns-label": "true"} + + ns1 := createUDNNamespace("test-ns-label-ns1", cncLabel) + ns2 := createUDNNamespace("test-ns-label-ns2", nil) // no matching label initially + udnName1 := "udn1" + udnName2 := "udn2" + + DeferCleanup(func() { + deleteCNC(cncName) + deleteUDN(ns1.Name, udnName1) + deleteUDN(ns2.Name, udnName2) + cs.CoreV1().Namespaces().Delete(context.Background(), ns1.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), ns2.Name, metav1.DeleteOptions{}) + }) + + By("creating 2 UDNs - first in namespace with matching label, second without") + createLayer3PrimaryUDN(ns1.Name, udnName1) + createLayer2PrimaryUDN(ns2.Name, udnName2) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns1.Name, udnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, ns2.Name, udnName2), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC with PUDN namespace selector") + createOrUpdateCNC(cncName, nil, cncLabel) + + By("verifying CNC has 1 network initially") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3"}) + + By("adding matching label to second namespace - count increases") + _, err := cs.CoreV1().Namespaces().Patch(context.Background(), ns2.Name, + types.MergePatchType, + []byte(`{"metadata":{"labels":{"test-ns-label":"true"}}}`), + metav1.PatchOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verifying CNC now has 2 networks") + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer2"}) + + By("removing matching label from second namespace - count decreases") + _, err = cs.CoreV1().Namespaces().Patch(context.Background(), ns2.Name, + types.MergePatchType, + []byte(`{"metadata":{"labels":{"test-ns-label":null}}}`), + metav1.PatchOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verifying CNC now has 1 network") + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3"}) + + By("removing matching label from first namespace - count decreases to 0") + _, err = cs.CoreV1().Namespaces().Patch(context.Background(), ns1.Name, + types.MergePatchType, + []byte(`{"metadata":{"labels":{"test-ns-label":null}}}`), + metav1.PatchOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verifying CNC has no networks remaining") + verifyCNCSubnetAnnotationNetworkCount(cncName, 0) + verifyCNCSubnetAnnotationContent(cncName, []string{}) + }) + }) + + // =========================================== + // Group 8: Multiple CNCs - multiple CNCs in cluster (3 tests) + // =========================================== + Context("when multiple CNCs exist", func() { + It("two CNCs with non-overlapping selectors - each tracks its own networks", func() { + cncName1 := generateCNCName() + cncName2 := generateCNCName() + label1 := map[string]string{"test-multi-cnc-1": "true"} + label2 := map[string]string{"test-multi-cnc-2": "true"} + + ns1 := createUDNNamespace("test-multi-cnc-ns1", nil) + ns2 := createUDNNamespace("test-multi-cnc-ns2", nil) + cudnName1 := fmt.Sprintf("multi-cnc-cudn1-%s", rand.String(5)) + cudnName2 := fmt.Sprintf("multi-cnc-cudn2-%s", rand.String(5)) + + DeferCleanup(func() { + deleteCNC(cncName1) + deleteCNC(cncName2) + deleteCUDN(cudnName1) + deleteCUDN(cudnName2) + cs.CoreV1().Namespaces().Delete(context.Background(), ns1.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), ns2.Name, metav1.DeleteOptions{}) + }) + + By("creating 2 CUDNs with different labels") + createLayer3PrimaryCUDN(cudnName1, label1, ns1.Name) + createLayer2PrimaryCUDN(cudnName2, label2, ns2.Name) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName2), 30*time.Second, time.Second).Should(Succeed()) + + By("creating first CNC matching first CUDN") + createOrUpdateCNC(cncName1, label1, nil) + + By("creating second CNC matching second CUDN") + createOrUpdateCNC(cncName2, label2, nil) + + By("verifying first CNC has only first network") + verifyCNCHasBothAnnotations(cncName1) + verifyCNCSubnetAnnotationNetworkCount(cncName1, 1) + verifyCNCSubnetAnnotationContent(cncName1, []string{"Layer3"}) + + By("verifying second CNC has only second network") + verifyCNCHasBothAnnotations(cncName2) + verifyCNCSubnetAnnotationNetworkCount(cncName2, 1) + verifyCNCSubnetAnnotationContent(cncName2, []string{"Layer2"}) + + By("verifying CNCs have different tunnel IDs") + annotations1, err := getCNCAnnotations(cncName1) + Expect(err).NotTo(HaveOccurred()) + annotations2, err := getCNCAnnotations(cncName2) + Expect(err).NotTo(HaveOccurred()) + Expect(annotations1[ovnConnectRouterTunnelKeyAnnotation]).NotTo(Equal(annotations2[ovnConnectRouterTunnelKeyAnnotation]), + "CNCs should have different tunnel IDs") + }) + + It("two CNCs matching same network - both track the network (this works but is usually treated as misconfiguration)", func() { + cncName1 := generateCNCName() + cncName2 := generateCNCName() + sharedLabel := map[string]string{"test-shared-cudn": "true"} + + ns := createUDNNamespace("test-shared-cudn-ns", nil) + cudnName := fmt.Sprintf("shared-cudn-%s", rand.String(5)) + + DeferCleanup(func() { + deleteCNC(cncName1) + deleteCNC(cncName2) + deleteCUDN(cudnName) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + }) + + By("creating a CUDN with shared label") + createLayer3PrimaryCUDN(cudnName, sharedLabel, ns.Name) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating first CNC matching the CUDN") + createOrUpdateCNC(cncName1, sharedLabel, nil) + + By("creating second CNC also matching the CUDN") + createOrUpdateCNC(cncName2, sharedLabel, nil) + + By("verifying both CNCs have the network in their annotations") + verifyCNCHasBothAnnotations(cncName1) + verifyCNCSubnetAnnotationNetworkCount(cncName1, 1) + verifyCNCSubnetAnnotationContent(cncName1, []string{"Layer3"}) + + verifyCNCHasBothAnnotations(cncName2) + verifyCNCSubnetAnnotationNetworkCount(cncName2, 1) + verifyCNCSubnetAnnotationContent(cncName2, []string{"Layer3"}) + + By("verifying CNCs have different tunnel IDs") + annotations1, err := getCNCAnnotations(cncName1) + Expect(err).NotTo(HaveOccurred()) + annotations2, err := getCNCAnnotations(cncName2) + Expect(err).NotTo(HaveOccurred()) + Expect(annotations1[ovnConnectRouterTunnelKeyAnnotation]).NotTo(Equal(annotations2[ovnConnectRouterTunnelKeyAnnotation]), + "CNCs should have different tunnel IDs") + }) + + It("deleting one CNC does not affect the other", func() { + cncName1 := generateCNCName() + cncName2 := generateCNCName() + label1 := map[string]string{"test-cnc-delete-1": "true"} + label2 := map[string]string{"test-cnc-delete-2": "true"} + + ns1 := createUDNNamespace("test-cnc-delete-ns1", nil) + ns2 := createUDNNamespace("test-cnc-delete-ns2", nil) + cudnName1 := fmt.Sprintf("cnc-delete-cudn1-%s", rand.String(5)) + cudnName2 := fmt.Sprintf("cnc-delete-cudn2-%s", rand.String(5)) + + DeferCleanup(func() { + deleteCNC(cncName2) // cncName1 deleted in test + deleteCUDN(cudnName1) + deleteCUDN(cudnName2) + cs.CoreV1().Namespaces().Delete(context.Background(), ns1.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), ns2.Name, metav1.DeleteOptions{}) + }) + + By("creating 2 CUDNs with different labels") + createLayer3PrimaryCUDN(cudnName1, label1, ns1.Name) + createLayer2PrimaryCUDN(cudnName2, label2, ns2.Name) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName2), 30*time.Second, time.Second).Should(Succeed()) + + By("creating two CNCs with different selectors") + createOrUpdateCNC(cncName1, label1, nil) + createOrUpdateCNC(cncName2, label2, nil) + + By("verifying both CNCs have their networks") + verifyCNCHasBothAnnotations(cncName1) + verifyCNCSubnetAnnotationNetworkCount(cncName1, 1) + verifyCNCHasBothAnnotations(cncName2) + verifyCNCSubnetAnnotationNetworkCount(cncName2, 1) + + By("deleting first CNC") + deleteCNC(cncName1) + + By("verifying second CNC is unaffected") + verifyCNCHasBothAnnotations(cncName2) + verifyCNCSubnetAnnotationNetworkCount(cncName2, 1) + verifyCNCSubnetAnnotationContent(cncName2, []string{"Layer2"}) + }) + }) + + // =========================================== + // Group 9: CNC Lifecycle - CNC deletion and recreation + // =========================================== + Context("CNC lifecycle", func() { + It("CNC deletion and recreation - tunnel ID is allocated after recreate", func() { + cncName := generateCNCName() + cncLabel := map[string]string{"test-cnc-lifecycle": "true"} + + ns := createUDNNamespace("test-cnc-lifecycle-ns", nil) + cudnName := fmt.Sprintf("cnc-lifecycle-cudn-%s", rand.String(5)) + + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(cudnName) + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + }) + + By("creating a CUDN") + createLayer3PrimaryCUDN(cudnName, cncLabel, ns.Name) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC") + createOrUpdateCNC(cncName, cncLabel, nil) + + By("verifying CNC has network and tunnel ID") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + originalTunnelID := getCNCTunnelID(cncName) + Expect(originalTunnelID).NotTo(BeEmpty()) + + By("deleting CNC") + deleteCNC(cncName) + + By("verifying CNC is gone") + Eventually(func() bool { + _, err := getCNCAnnotations(cncName) + return err != nil + }, 30*time.Second, time.Second).Should(BeTrue()) + + By("recreating CNC with same name") + createOrUpdateCNC(cncName, cncLabel, nil) + + By("verifying CNC has network again") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + + By("verifying tunnel ID is newly allocated after CNC recreation") + newTunnelID := getCNCTunnelID(cncName) + Expect(newTunnelID).NotTo(BeEmpty()) + Expect(newTunnelID).NotTo(Equal(originalTunnelID)) + }) + + It("tunnel ID is stable across CNC spec updates", func() { + cncName := generateCNCName() + label1 := map[string]string{"test-tunnel-stable-1": "true"} + label2 := map[string]string{"test-tunnel-stable-2": "true"} + + ns1 := createUDNNamespace("test-tunnel-stable-ns1", nil) + ns2 := createUDNNamespace("test-tunnel-stable-ns2", nil) + cudnName1 := fmt.Sprintf("tunnel-stable-cudn1-%s", rand.String(5)) + cudnName2 := fmt.Sprintf("tunnel-stable-cudn2-%s", rand.String(5)) + + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(cudnName1) + deleteCUDN(cudnName2) + cs.CoreV1().Namespaces().Delete(context.Background(), ns1.Name, metav1.DeleteOptions{}) + cs.CoreV1().Namespaces().Delete(context.Background(), ns2.Name, metav1.DeleteOptions{}) + }) + + By("creating 2 CUDNs with different labels") + createLayer3PrimaryCUDN(cudnName1, label1, ns1.Name) + createLayer2PrimaryCUDN(cudnName2, label2, ns2.Name) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName1), 30*time.Second, time.Second).Should(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName2), 30*time.Second, time.Second).Should(Succeed()) + + By("creating CNC matching first CUDN") + createOrUpdateCNC(cncName, label1, nil) + + By("verifying CNC has network and recording tunnel ID") + verifyCNCHasBothAnnotations(cncName) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + originalTunnelID := getCNCTunnelID(cncName) + + By("updating CNC to match second CUDN instead") + createOrUpdateCNC(cncName, label2, nil) + + By("verifying CNC now has second network") + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2"}) + + By("verifying tunnel ID is unchanged") + newTunnelID := getCNCTunnelID(cncName) + Expect(newTunnelID).To(Equal(originalTunnelID), + "tunnel ID should be stable across spec updates") + + By("updating CNC to match both CUDNs") + // Add label1 to second CUDN so we can match both + _, err := e2ekubectl.RunKubectl("", "label", "clusteruserdefinednetwork", cudnName2, "test-tunnel-stable-1=true") + Expect(err).NotTo(HaveOccurred()) + createOrUpdateCNC(cncName, label1, nil) + + By("verifying CNC now has both networks") + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + + By("verifying tunnel ID is still unchanged") + finalTunnelID := getCNCTunnelID(cncName) + Expect(finalTunnelID).To(Equal(originalTunnelID), + "tunnel ID should remain stable") + }) + + }) + + // =========================================== + // Group 10: Full Lifecycle Workflow (1 comprehensive test) + // =========================================== + Context("full lifecycle workflow", func() { + It("comprehensive workflow - create, add, update, remove networks through CNC lifecycle", func() { + cncName := generateCNCName() + cudnLabel := map[string]string{"test-lifecycle": "true"} + pudnLabel := map[string]string{"test-lifecycle": "true"} + var expectedTopologies []string + + // Create namespaces + cudnNs1 := createUDNNamespace("lifecycle-cudn-ns1", nil) + cudnNs2 := createUDNNamespace("lifecycle-cudn-ns2", nil) + udnNs1 := createUDNNamespace("lifecycle-udn-ns1", pudnLabel) + udnNs2 := createUDNNamespace("lifecycle-udn-ns2", pudnLabel) + + cudnName1 := fmt.Sprintf("lifecycle-cudn1-%s", rand.String(5)) + cudnName2 := fmt.Sprintf("lifecycle-cudn2-%s", rand.String(5)) + udnName1 := "udn1" + udnName2 := "udn2" + + DeferCleanup(func() { + deleteCNC(cncName) + deleteCUDN(cudnName1) + deleteCUDN(cudnName2) + deleteUDN(udnNs1.Name, udnName1) + deleteUDN(udnNs2.Name, udnName2) + for _, ns := range []*corev1.Namespace{cudnNs1, cudnNs2, udnNs1, udnNs2} { + cs.CoreV1().Namespaces().Delete(context.Background(), ns.Name, metav1.DeleteOptions{}) + } + }) + + // Phase 1: Create CNC with no matching networks + By("Phase 1: Creating CNC with no matching networks yet") + createOrUpdateCNC(cncName, cudnLabel, pudnLabel) + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + originalTunnelID := getCNCTunnelID(cncName) + + // Phase 2: Create first L3 CUDN - count goes to 1 + By("Phase 2: Creating first L3 CUDN") + createLayer3PrimaryCUDN(cudnName1, cudnLabel, cudnNs1.Name) + expectedTopologies = append(expectedTopologies, "Layer3") + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName1), 30*time.Second, time.Second).Should(Succeed()) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + + // Phase 3: Create first L2 UDN - count goes to 2 + By("Phase 3: Creating first L2 UDN") + createLayer2PrimaryUDN(udnNs1.Name, udnName1) + expectedTopologies = append(expectedTopologies, "Layer2") + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udnNs1.Name, udnName1), 30*time.Second, time.Second).Should(Succeed()) + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + + // Phase 4: Create second L2 CUDN - count goes to 3 + By("Phase 4: Creating second L2 CUDN") + createLayer2PrimaryCUDN(cudnName2, cudnLabel, cudnNs2.Name) + expectedTopologies = append(expectedTopologies, "Layer2") + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnName2), 30*time.Second, time.Second).Should(Succeed()) + verifyCNCSubnetAnnotationNetworkCount(cncName, 3) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + + // Phase 5: Create second L3 UDN - count goes to 4 + By("Phase 5: Creating second L3 UDN") + createLayer3PrimaryUDN(udnNs2.Name, udnName2) + expectedTopologies = append(expectedTopologies, "Layer3") + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udnNs2.Name, udnName2), 30*time.Second, time.Second).Should(Succeed()) + verifyCNCSubnetAnnotationNetworkCount(cncName, 4) + verifyCNCSubnetAnnotationContent(cncName, expectedTopologies) + + // Verify tunnel ID is stable + By("Verifying tunnel ID unchanged after adding networks") + Expect(getCNCTunnelID(cncName)).To(Equal(originalTunnelID)) + + // Phase 6: Remove PUDN selector - count goes to 2 (only CUDNs remain) + By("Phase 6: Removing PUDN selector from CNC") + createOrUpdateCNC(cncName, cudnLabel, nil) + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer3", "Layer2"}) // cudnName1 is L3, cudnName2 is L2 + + // Verify tunnel ID is stable + By("Verifying tunnel ID unchanged after selector update") + Expect(getCNCTunnelID(cncName)).To(Equal(originalTunnelID)) + + // Phase 7: Delete one CUDN - count goes to 1 + By("Phase 7: Deleting first CUDN") + deleteCUDN(cudnName1) + verifyCNCSubnetAnnotationNetworkCount(cncName, 1) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2"}) // only cudnName2 remains + + // Phase 8: Add PUDN selector back - count goes to 3 (1 CUDN + 2 UDNs) + By("Phase 8: Adding PUDN selector back to CNC") + createOrUpdateCNC(cncName, cudnLabel, pudnLabel) + verifyCNCSubnetAnnotationNetworkCount(cncName, 3) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2", "Layer2", "Layer3"}) // cudnName2(L2), udn1(L2), udn2(L3) + + // Verify tunnel ID is stable + By("Verifying tunnel ID unchanged after adding selector back") + Expect(getCNCTunnelID(cncName)).To(Equal(originalTunnelID)) + + // Phase 9: Remove label from namespace - UDN1 no longer matches - count goes to 2 + By("Phase 9: Removing label from first UDN namespace") + _, err := cs.CoreV1().Namespaces().Patch(context.Background(), udnNs1.Name, + types.MergePatchType, + []byte(`{"metadata":{"labels":{"test-lifecycle":null}}}`), + metav1.PatchOptions{}) + Expect(err).NotTo(HaveOccurred()) + verifyCNCSubnetAnnotationNetworkCount(cncName, 2) + verifyCNCSubnetAnnotationContent(cncName, []string{"Layer2", "Layer3"}) // cudnName2(L2), udn2(L3) + + // Phase 10: Delete remaining networks - count goes to 0 + By("Phase 10: Deleting remaining networks") + deleteCUDN(cudnName2) + deleteUDN(udnNs2.Name, udnName2) + verifyCNCSubnetAnnotationNetworkCount(cncName, 0) + verifyCNCSubnetAnnotationContent(cncName, []string{}) + + // Verify tunnel ID is stable even with no networks + By("Verifying tunnel ID unchanged even with no networks") + Expect(getCNCTunnelID(cncName)).To(Equal(originalTunnelID)) + + // Final verification: CNC still exists with only tunnel ID + By("Final: Verifying CNC has only tunnel ID annotation") + verifyCNCHasOnlyTunnelIDAnnotation(cncName) + + By("Deleting CNC") + deleteCNC(cncName) + }) + }) +}) diff --git a/test/e2e/cluster_network_connect_validations.go b/test/e2e/cluster_network_connect_validations.go index 4278381813..bb730a7d7f 100644 --- a/test/e2e/cluster_network_connect_validations.go +++ b/test/e2e/cluster_network_connect_validations.go @@ -11,7 +11,7 @@ import ( testscenariocnc "github.com/ovn-org/ovn-kubernetes/test/e2e/testscenario/clusternetworkconnect" ) -var _ = Describe("ClusterNetworkConnect: API validations", feature.NetworkSegmentation, func() { +var _ = Describe("ClusterNetworkConnect: API validations", feature.NetworkConnect, func() { DescribeTable("api-server should reject invalid ClusterNetworkConnect CRs", func(scenarios []testscenario.ValidateCRScenario) { DeferCleanup(func() { diff --git a/test/e2e/deploymentconfig/config.go b/test/e2e/deploymentconfig/config.go index 8675335453..db3a6dc8e4 100644 --- a/test/e2e/deploymentconfig/config.go +++ b/test/e2e/deploymentconfig/config.go @@ -1,30 +1,20 @@ package deploymentconfig import ( - "fmt" - "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig/api" - "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig/configs/kind" - - "k8s.io/client-go/rest" ) -var deployment api.DeploymentConfig +var deploymentConfig api.DeploymentConfig -func Set(_ *rest.Config) error { - // upstream currently uses KinD as its preferred platform infra, so if we detect KinD, its upstream - if kind.IsKind() { - deployment = kind.New() - } - if deployment == nil { - return fmt.Errorf("failed to determine the deployment config") - } - return nil +// Set deployment config. +func Set(deployment api.DeploymentConfig) { + deploymentConfig = deployment } +// Get deployment config. func Get() api.DeploymentConfig { - if deployment == nil { + if deploymentConfig == nil { panic("deployment config type not set") } - return deployment + return deploymentConfig } diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index c6f32f5794..205650a85d 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -15,6 +15,8 @@ import ( "github.com/ovn-org/ovn-kubernetes/test/e2e/ipalloc" "github.com/ovn-org/ovn-kubernetes/test/e2e/label" + deploymentkind "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig/configs/kind" + infrakind "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/providers/kind" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" "k8s.io/klog" @@ -42,10 +44,6 @@ var _ = ginkgo.BeforeSuite(func() { framework.ExpectNoError(err) config, err := framework.LoadConfig() framework.ExpectNoError(err) - err = infraprovider.Set(config) - framework.ExpectNoError(err, "must configure infrastructure provider") - err = deploymentconfig.Set(config) - framework.ExpectNoError(err, "must detect deployment configuration") client, err := clientset.NewForConfig(config) framework.ExpectNoError(err, "k8 clientset is required to list nodes") err = ipalloc.InitPrimaryIPAllocator(client.CoreV1().Nodes()) @@ -57,6 +55,27 @@ func TestMain(m *testing.M) { // Register test flags, then parse flags. handleFlags() ProcessTestContextAndSetupLogging() + + // Set up infrastructure provider and deployment config + // Upstream currently uses KinD as its preferred platform infra, So TestMain + // is expected to run only there. + if !infrakind.IsProvider() { + klog.Fatal("Cluster provider must be KinD type") + } + infrastructure := infrakind.New() + if infrastructure == nil { + klog.Fatal("Failed to determine the infrastructure provider") + } + infraprovider.Set(infrastructure) + if !deploymentkind.IsKind() { + klog.Fatal("Deployment Config must be KinD type") + } + deployment := deploymentkind.New() + if deployment == nil { + klog.Fatal("Failed to determine the deployment config") + } + deploymentconfig.Set(deployment) + os.Exit(m.Run()) } diff --git a/test/e2e/feature/features.go b/test/e2e/feature/features.go index a11c8bbe8e..0a995c1b37 100644 --- a/test/e2e/feature/features.go +++ b/test/e2e/feature/features.go @@ -26,6 +26,7 @@ var ( RouteAdvertisements = New("RouteAdvertisements") Unidle = New("Unidle") NetworkQos = New("NetworkQos") + NetworkConnect = New("NetworkConnect") ) func New(name string) ginkgo.Labels { diff --git a/test/e2e/infraprovider/api/api.go b/test/e2e/infraprovider/api/api.go index 1d2d3466fb..2a38ef6595 100644 --- a/test/e2e/infraprovider/api/api.go +++ b/test/e2e/infraprovider/api/api.go @@ -37,6 +37,14 @@ type Provider interface { // tests utilizing conflicting ports. It also allows infra provider implementations to set Nodes // allowed port range and therefore comply with cloud provider firewall rules. GetK8HostPort() uint16 // supported K8 host ports + + // ShutdownNode shuts down the specified node + ShutdownNode(nodeName string) error + // StartNode starts the specified node + StartNode(nodeName string) error + + // Get platform specific timeout values + GetDefaultTimeoutContext() *framework.TimeoutContext } // Underlay represents the configuration for an underlay network. diff --git a/test/e2e/infraprovider/provider.go b/test/e2e/infraprovider/provider.go index 5f53b35cd5..f33399ba90 100644 --- a/test/e2e/infraprovider/provider.go +++ b/test/e2e/infraprovider/provider.go @@ -1,11 +1,7 @@ package infraprovider import ( - "fmt" "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" - "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/providers/kind" - - "k8s.io/client-go/rest" ) type Name string @@ -14,24 +10,17 @@ func (n Name) String() string { return string(n) } -var provider api.Provider +var infraProvider api.Provider -// Set detects which infrastructure provider. Arg config is not needed for KinD provider but downstream implementations -// will require access to the kapi to infer what platform k8 is running on. -func Set(_ *rest.Config) error { - // detect if the provider is KinD - if kind.IsProvider() { - provider = kind.New() - } - if provider == nil { - return fmt.Errorf("failed to determine the infrastructure provider") - } - return nil +// Set infrastructure provider. +func Set(provider api.Provider) { + infraProvider = provider } +// Get infrastructure provider. func Get() api.Provider { - if provider == nil { - panic("provider not set") + if infraProvider == nil { + panic("infra provider not set") } - return provider + return infraProvider } diff --git a/test/e2e/infraprovider/providers/kind/kind.go b/test/e2e/infraprovider/providers/kind/kind.go index 8c068c7411..cbf0a05f85 100644 --- a/test/e2e/infraprovider/providers/kind/kind.go +++ b/test/e2e/infraprovider/providers/kind/kind.go @@ -28,11 +28,13 @@ import ( func IsProvider() bool { _, err := exec.LookPath("kubectl") if err != nil { - panic("kubectl must be installed") + framework.Logf("kubectl is not installed: %v", err) + return false } currentCtx, err := exec.Command("kubectl", "config", "current-context").CombinedOutput() if err != nil { - panic(fmt.Sprintf("unable to get current cluster context: %v", err)) + framework.Logf("unable to get current cluster context: %v", err) + return false } if strings.Contains(string(currentCtx), "kind-ovn") { return true @@ -74,11 +76,15 @@ func (k *kind) GetK8NodeNetworkInterface(container string, network api.Network) } func (k *kind) ExecK8NodeCommand(nodeName string, cmd []string) (string, error) { - if !doesContainerNameExist(nodeName) { + exists, err := doesContainerNameExist(nodeName) + if err != nil { + return "", fmt.Errorf("failed to check if container %q exists: %w", nodeName, err) + } + if !exists { return "", fmt.Errorf("cannot exec into container %q because it doesn't exist: %w", nodeName, api.NotFound) } if len(cmd) == 0 { - panic("ExecK8NodeCommand(): insufficient command arguments") + return "", fmt.Errorf("ExecK8NodeCommand(): insufficient command arguments") } cmdArgs := append([]string{"exec", nodeName}, cmd...) stdOut, err := exec.Command(containerengine.Get().String(), cmdArgs...).CombinedOutput() @@ -89,7 +95,11 @@ func (k *kind) ExecK8NodeCommand(nodeName string, cmd []string) (string, error) } func (k *kind) ExecExternalContainerCommand(container api.ExternalContainer, cmd []string) (string, error) { - if !doesContainerNameExist(container.Name) { + exists, err := doesContainerNameExist(container.Name) + if err != nil { + return "", fmt.Errorf("failed to check if container %q exists: %w", container.Name, err) + } + if !exists { return "", fmt.Errorf("cannot exec into container %q because it doesn't exist: %w", container.Name, api.NotFound) } cmdArgs := append([]string{"exec", container.Name}, cmd...) @@ -101,7 +111,11 @@ func (k *kind) ExecExternalContainerCommand(container api.ExternalContainer, cmd } func (k *kind) GetExternalContainerLogs(container api.ExternalContainer) (string, error) { - if !doesContainerNameExist(container.Name) { + exists, err := doesContainerNameExist(container.Name) + if err != nil { + return "", fmt.Errorf("failed to check if container %q exists: %w", container.Name, err) + } + if !exists { return "", fmt.Errorf("container %q doesn't exist, therefore no logs can be retrieved: %w", container.Name, api.NotFound) } stdOut, err := exec.Command(containerengine.Get().String(), "logs", container.Name).CombinedOutput() @@ -119,6 +133,72 @@ func (k *kind) GetK8HostPort() uint16 { return k.hostPort.Allocate() } +func (k *kind) GetDefaultTimeoutContext() *framework.TimeoutContext { + return framework.NewTimeoutContext() +} + +// getContainerState returns the state of a container by name +// Returns empty string if container doesn't exist +func getContainerState(containerName string) (string, error) { + stdOut, err := exec.Command(containerengine.Get().String(), "ps", "-a", "-f", fmt.Sprintf("name=^%s$", containerName), "--format", "{{.State}}").CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to check container state for %s: %s (%s)", containerName, err, stdOut) + } + + state := strings.TrimSpace(string(stdOut)) + return state, nil +} + +func (k *kind) ShutdownNode(nodeName string) error { + state, err := getContainerState(nodeName) + if err != nil { + return err + } + + if state == "" { + return fmt.Errorf("cannot shutdown node %q because it doesn't exist: %w", nodeName, api.NotFound) + } + + // If container is already stopped/exited, consider it success + if state == "exited" || state == "stopped" { + framework.Logf("Node %s is already stopped (state: %s)", nodeName, state) + return nil + } + + framework.Logf("Shutting down node %s (current state: %s)", nodeName, state) + stdOut, err := exec.Command(containerengine.Get().String(), "stop", nodeName).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to shutdown node %s: %s (%s)", nodeName, err, stdOut) + } + framework.Logf("Successfully shut down node %s", nodeName) + return nil +} + +func (k *kind) StartNode(nodeName string) error { + state, err := getContainerState(nodeName) + if err != nil { + return err + } + + if state == "" { + return fmt.Errorf("cannot start node %q because it doesn't exist: %w", nodeName, api.NotFound) + } + + // If container is already running, consider it success + if state == "running" || state == "up" { + framework.Logf("Node %s is already running (state: %s)", nodeName, state) + return nil + } + + framework.Logf("Starting node %s (current state: %s)", nodeName, state) + stdOut, err := exec.Command(containerengine.Get().String(), "start", nodeName).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to start node %s: %s (%s)", nodeName, err, stdOut) + } + framework.Logf("Successfully started node %s", nodeName) + return nil +} + func (k *kind) NewTestContext() api.Context { ck := &contextKind{Mutex: sync.Mutex{}} ginkgo.DeferCleanup(ck.CleanUp) @@ -143,7 +223,11 @@ func (c *contextKind) createExternalContainer(container api.ExternalContainer) ( if valid, err := container.IsValidPreCreateContainer(); !valid { return container, err } - if doesContainerNameExist(container.Name) { + exists, err := doesContainerNameExist(container.Name) + if err != nil { + return container, fmt.Errorf("failed to check if container %s exists: %w", container.Name, err) + } + if exists { return container, fmt.Errorf("container %s already exists", container.Name) } cmd := []string{"run", "-itd", "--privileged", "--name", container.Name, "--network", container.Network.Name(), "--hostname", container.Name} @@ -199,7 +283,11 @@ func (c *contextKind) DeleteExternalContainer(container api.ExternalContainer) e func (c *contextKind) deleteExternalContainer(container api.ExternalContainer) error { // check if it is present before deleting - if !doesContainerNameExist(container.Name) { + exists, err := doesContainerNameExist(container.Name) + if err != nil { + return fmt.Errorf("failed to check if container %s exists: %w", container.Name, err) + } + if !exists { return nil } stdOut, err := exec.Command(containerengine.Get().String(), "rm", "-f", container.Name).CombinedOutput() @@ -230,7 +318,11 @@ func (c *contextKind) CreateNetwork(name string, subnets ...string) (api.Network func (c *contextKind) createNetwork(name string, subnets ...string) (api.Network, error) { network := containerEngineNetwork{name, nil} - if doesNetworkExist(name) { + exists, err := doesNetworkExist(name) + if err != nil { + return network, fmt.Errorf("failed to check if network %s exists: %w", name, err) + } + if exists { attachedContainers, err := getContainerAttachedToNetwork(name) if err != nil { framework.Logf("failed to get containers attached to network %s: %v", name, err) @@ -267,7 +359,11 @@ func (c *contextKind) AttachNetwork(network api.Network, container string) (api. } func (c *contextKind) attachNetwork(network api.Network, container string) (api.NetworkInterface, error) { - if !doesNetworkExist(network.Name()) { + exists, err := doesNetworkExist(network.Name()) + if err != nil { + return api.NetworkInterface{}, fmt.Errorf("failed to check if network %s exists: %w", network.Name(), err) + } + if !exists { return api.NetworkInterface{}, fmt.Errorf("network %s doesn't exist", network.Name()) } if isNetworkAttachedToContainer(network.Name(), container) { @@ -289,7 +385,11 @@ func (c *contextKind) DetachNetwork(network api.Network, container string) error } func (c *contextKind) detachNetwork(network api.Network, container string) error { - if !doesNetworkExist(network.Name()) { + exists, err := doesNetworkExist(network.Name()) + if err != nil { + return fmt.Errorf("failed to check if network %s exists: %w", network.Name(), err) + } + if !exists { return nil } if !isNetworkAttachedToContainer(network.Name(), container) { @@ -310,7 +410,12 @@ func (c *contextKind) DeleteNetwork(network api.Network) error { func (c *contextKind) deleteNetwork(network api.Network) error { return wait.PollImmediate(1*time.Second, 10*time.Second, func() (done bool, err error) { - if !doesNetworkExist(network.Name()) { + exists, err := doesNetworkExist(network.Name()) + if err != nil { + framework.Logf("failed to check if network %s exists: %v", network.Name(), err) + return false, nil + } + if !exists { return true, nil } // ensure all containers are disconnected from the network and if any are found, disconnect it. @@ -517,34 +622,35 @@ func isNetworkAttachedToContainer(networkName, containerName string) bool { return true } -func doesContainerNameExist(name string) bool { - // check if it is present before retrieving logs - stdOut, err := exec.Command(containerengine.Get().String(), "ps", "-f", fmt.Sprintf("name=^%s$", name), "-q").CombinedOutput() +func doesContainerNameExist(name string) (bool, error) { + state, err := getContainerState(name) if err != nil { - panic(fmt.Sprintf("failed to check if external container (%s) exists: %v (%s)", name, err, stdOut)) - } - if string(stdOut) == "" { - return false + return false, err } - return true + // Empty state means container doesn't exist + return state != "", nil } -func doesNetworkExist(networkName string) bool { +func doesNetworkExist(networkName string) (bool, error) { dataBytes, err := exec.Command(containerengine.Get().String(), "network", "ls", "--format", nameFormat).CombinedOutput() if err != nil { - panic(err.Error()) + return false, fmt.Errorf("failed to list networks: %w", err) } for _, existingNetworkName := range strings.Split(strings.Trim(string(dataBytes), "\n"), "\n") { if existingNetworkName == networkName { - return true + return true, nil } } - return false + return false, nil } func getNetwork(networkName string) (containerEngineNetwork, error) { n := containerEngineNetwork{name: networkName} - if !doesNetworkExist(networkName) { + exists, err := doesNetworkExist(networkName) + if err != nil { + return n, fmt.Errorf("failed to check if network %s exists: %w", networkName, err) + } + if !exists { return n, api.NotFound } configs := make([]containerEngineNetworkConfig, 0, 1) @@ -590,10 +696,18 @@ func getContainerAttachedToNetwork(networkName string) ([]string, error) { func getNetworkInterface(containerName, networkName string) (api.NetworkInterface, error) { var ni = api.NetworkInterface{} - if !doesNetworkExist(networkName) { + exists, err := doesNetworkExist(networkName) + if err != nil { + return ni, fmt.Errorf("failed to check if network %q exists: %w", networkName, err) + } + if !exists { return ni, fmt.Errorf("failed to find network %q: %w", networkName, api.NotFound) } - if !doesContainerNameExist(containerName) { + exists, err = doesContainerNameExist(containerName) + if err != nil { + return ni, fmt.Errorf("failed to check if container %q exists: %w", containerName, err) + } + if !exists { return ni, fmt.Errorf("failed to find container %q: %w", containerName, api.NotFound) } getContainerNetwork := func(inspectTemplate string) (string, error) { @@ -611,19 +725,23 @@ func getNetworkInterface(containerName, networkName string) (api.NetworkInterfac return valueStr, nil } - getIPFamilyFlagForIPRoute2 := func(ipStr string) string { + getIPFamilyFlagForIPRoute2 := func(ipStr string) (string, error) { ip := net.ParseIP(ipStr) if ip == nil { - panic("invalid IP") + return "", fmt.Errorf("invalid IP address: %s", ipStr) } if utilnet.IsIPv6(ip) { - return "-6" + return "-6", nil } - return "-4" + return "-4", nil } getInterfaceNameUsingIP := func(ip string) (string, error) { - allInfAddrBytes, err := exec.Command(containerengine.Get().String(), "exec", "-i", containerName, "ip", "-br", getIPFamilyFlagForIPRoute2(ip), "a", "sh").CombinedOutput() + ipFlag, err := getIPFamilyFlagForIPRoute2(ip) + if err != nil { + return "", fmt.Errorf("failed to get IP family flag for %s: %w", ip, err) + } + allInfAddrBytes, err := exec.Command(containerengine.Get().String(), "exec", "-i", containerName, "ip", "-br", ipFlag, "a", "sh").CombinedOutput() if err != nil { return "", fmt.Errorf("failed to find interface with IP %s on container %s with command 'ip -br a sh': err %v, out: %s", ip, containerName, err, allInfAddrBytes) @@ -661,7 +779,6 @@ func getNetworkInterface(containerName, networkName string) (api.NetworkInterfac return infName, nil // second value is veth in 'host' netns } - var err error ni.IPv4Gateway, err = getContainerNetwork(inspectNetworkIPv4GWKeyStr) if err != nil { // may not be available diff --git a/test/e2e/kubevirt.go b/test/e2e/kubevirt.go index e4dd99f170..2ebdcd0d0d 100644 --- a/test/e2e/kubevirt.go +++ b/test/e2e/kubevirt.go @@ -32,6 +32,7 @@ import ( corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -2408,7 +2409,60 @@ chpasswd: { expire: False } ) }) + getIPAMClaimName := func(vmName, netName string) string { + return fmt.Sprintf("%s.%s", vmName, netName) + } + + verifyIPAMClaimStatusSuccess := func(ipamClaimName string) { + Eventually(func(g Gomega) { + ipamClaim := &ipamclaimsv1alpha1.IPAMClaim{} + g.Expect(crClient.Get(context.Background(), crclient.ObjectKey{ + Namespace: namespace, + Name: ipamClaimName, + }, ipamClaim)).To(Succeed(), "Should get IPAMClaim") + + g.Expect(ipamClaim.Status.OwnerPod).NotTo(BeNil(), "OwnerPod should be set") + g.Expect(ipamClaim.Status.OwnerPod.Name).To(HavePrefix("virt-launcher-"), "OwnerPod should be the virt-launcher pod") + + g.Expect(ipamClaim.Status.Conditions).NotTo(BeEmpty(), "Conditions should be set") + condition := meta.FindStatusCondition(ipamClaim.Status.Conditions, "IPsAllocated") + g.Expect(condition).NotTo(BeNil(), "IPsAllocated condition should exist") + g.Expect(condition.Status).To(Equal(metav1.ConditionTrue), "Condition status should be True") + g.Expect(condition.Reason).To(Equal("SuccessfulAllocation"), "Condition reason should be SuccessfulAllocation") + + g.Expect(ipamClaim.Status.IPs).NotTo(BeEmpty(), "IPs should be set on successful allocation") + }). + WithTimeout(30*time.Second). + WithPolling(2*time.Second). + Should(Succeed(), fmt.Sprintf("IPAMClaim %s should have expected successful status", ipamClaimName)) + } + + verifyIPAMClaimStatusFailure := func(ipamClaimName string, expectedReason string) { + Eventually(func(g Gomega) { + ipamClaim := &ipamclaimsv1alpha1.IPAMClaim{} + g.Expect(crClient.Get(context.Background(), crclient.ObjectKey{ + Namespace: namespace, + Name: ipamClaimName, + }, ipamClaim)).To(Succeed(), "Should get IPAMClaim") + + g.Expect(ipamClaim.Status.OwnerPod).NotTo(BeNil(), "OwnerPod should be set") + g.Expect(ipamClaim.Status.OwnerPod.Name).To(HavePrefix("virt-launcher-"), "OwnerPod should be the virt-launcher pod") + + g.Expect(ipamClaim.Status.Conditions).NotTo(BeEmpty(), "Conditions should be set") + condition := meta.FindStatusCondition(ipamClaim.Status.Conditions, "IPsAllocated") + g.Expect(condition).NotTo(BeNil(), "IPsAllocated condition should exist") + g.Expect(condition.Status).To(Equal(metav1.ConditionFalse), "Condition status should be False") + g.Expect(condition.Reason).To(Equal(expectedReason), "Condition reason should match") + + g.Expect(ipamClaim.Status.IPs).To(BeEmpty(), "IPs should not be set on failed allocation") + }). + WithTimeout(30*time.Second). + WithPolling(2*time.Second). + Should(Succeed(), fmt.Sprintf("IPAMClaim %s should have expected failure status", ipamClaimName)) + } + Context("duplicate addresses validation", func() { + const networkName = "net1" var ( cudn *udnv1.ClusterUserDefinedNetwork duplicateIPv4 = "10.128.0.200" // Static IP that will be used by both VMs @@ -2432,7 +2486,7 @@ chpasswd: { expire: False } namespace = fr.Namespace.Name dualCIDRs := filterDualStackCIDRs(fr.ClientSet, []udnv1.CIDR{udnv1.CIDR(cidrIPv4), udnv1.CIDR(cidrIPv6)}) - cudn, _ = kubevirt.GenerateCUDN(namespace, "net1", udnv1.NetworkTopologyLayer2, udnv1.NetworkRolePrimary, dualCIDRs) + cudn, _ = kubevirt.GenerateCUDN(namespace, networkName, udnv1.NetworkTopologyLayer2, udnv1.NetworkRolePrimary, dualCIDRs) createCUDN(cudn) }) @@ -2459,6 +2513,9 @@ chpasswd: { expire: False } createVirtualMachine(vm1) waitForVMReadinessAndVerifyIPs(vm1.Name, staticIPs) + By("Verifying first VM IPAMClaim has successful status") + verifyIPAMClaimStatusSuccess(getIPAMClaimName(vm1.Name, networkName)) + By("Creating second VM with duplicate static IP - should fail") vm2 := createVMWithStaticIP("test-vm-2", staticIPs) createVirtualMachine(vm2) @@ -2466,6 +2523,9 @@ chpasswd: { expire: False } By("Verifying pod fails with duplicate IP allocation error") waitForVMPodErrorEvent(vm2.Name, "provided IP is already allocated") + By("Verifying second VM IPAMClaim has failure status with IPAddressConflict") + verifyIPAMClaimStatusFailure(getIPAMClaimName(vm2.Name, networkName), "IPAddressConflict") + By("Verifying first VM is still running normally") waitForVMReadinessAndVerifyIPs(vm1.Name, staticIPs) }) @@ -2495,6 +2555,9 @@ chpasswd: { expire: False } Expect(crClient.Get(context.Background(), crclient.ObjectKeyFromObject(vm1), vmi1)).To(Succeed()) Expect(vmi1.Status.Interfaces[0].MAC).To(Equal(testMAC), "vmi status should report the requested mac") + By("Verifying first VM IPAMClaim has successful status") + verifyIPAMClaimStatusSuccess(getIPAMClaimName(vm1.Name, networkName)) + By("Create second VM requesting the same MAC address") vmi2 := newVMIWithPrimaryIfaceMAC(testMAC) vm2 := generateVM(vmi2) @@ -2515,6 +2578,9 @@ chpasswd: { expire: False } HaveField("Message", ContainSubstring("MAC address already in use")), ))) + By("Verifying second VM IPAMClaim has failure status with MACAddressConflict") + verifyIPAMClaimStatusFailure(getIPAMClaimName(vm2.Name, networkName), "MACAddressConflict") + By("Assert second VM not running") Expect(crClient.Get(context.Background(), crclient.ObjectKeyFromObject(vm2), vmi2)).To(Succeed()) Expect(vmi2.Status.Conditions).To(ContainElement(SatisfyAll( @@ -2642,4 +2708,62 @@ chpasswd: { expire: False } Expect(actualAddresses).To(ConsistOf(staticIPs), "VM should get the requested static IPs") }) }) + + Context("ipv4 subnet exhaustion", func() { + const networkName = "net1" + var ( + cudn *udnv1.ClusterUserDefinedNetwork + cidrIPv4 = "10.130.0.0/30" // subnet with no usable IPs + cidrIPv6 = "2011:100:200::0/120" + ) + + BeforeEach(func() { + l := map[string]string{ + "e2e-framework": fr.BaseName, + RequiredUDNNamespaceLabel: "", + } + ns, err := fr.CreateNamespace(context.Background(), fr.BaseName, l) + Expect(err).NotTo(HaveOccurred()) + fr.Namespace = ns + namespace = fr.Namespace.Name + + dualCIDRs := filterDualStackCIDRs(fr.ClientSet, []udnv1.CIDR{udnv1.CIDR(cidrIPv4), udnv1.CIDR(cidrIPv6)}) + cudn, _ = kubevirt.GenerateCUDN(namespace, networkName, udnv1.NetworkTopologyLayer2, udnv1.NetworkRolePrimary, dualCIDRs) + createCUDN(cudn) + }) + + It("should fail when subnet is exhausted", func() { + By("Creating VM that should fail due to subnet exhaustion") + exhaustedVMName := "exhausted-vm" + vm := fedoraWithTestToolingVM( + nil, // labels + nil, // no static IP annotations + nil, // nodeSelector + kubevirtv1.NetworkSource{ + Pod: &kubevirtv1.PodNetwork{}, + }, + `#cloud-config +password: fedora +chpasswd: { expire: False } +`, + `version: 2 +ethernets: + eth0: + dhcp4: true + dhcp6: true + ipv6-address-generation: eui64`, + ) + vm.Name = exhaustedVMName + vm.Namespace = namespace + vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].Bridge = nil + vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].Binding = &kubevirtv1.PluginBinding{Name: "l2bridge"} + createVirtualMachine(vm) + + By("Verifying pod fails with subnet exhaustion error") + waitForVMPodErrorEvent(exhaustedVMName, "subnet address pool exhausted") + + By("Verifying VM IPAMClaim has failure status with SubnetExhausted") + verifyIPAMClaimStatusFailure(getIPAMClaimName(exhaustedVMName, networkName), "SubnetExhausted") + }) + }) }) diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 5c09665f2a..16a5181570 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -92,7 +92,7 @@ var _ = Describe("Multi Homing", feature.MultiHoming, func() { By("creating the attachment configuration") _, err := nadClient.NetworkAttachmentDefinitions(netConfig.namespace).Create( context.Background(), - generateNAD(netConfig, f.ClientSet), + generateNetAttachDef(netConfig.namespace, netConfig.name, generateNADSpec(netConfig)), metav1.CreateOptions{}, ) Expect(err).NotTo(HaveOccurred()) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index f34bdf2a42..0aefc2236c 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "os" + "path/filepath" "reflect" "strings" "time" @@ -37,6 +38,7 @@ import ( "k8s.io/kubernetes/test/e2e/framework" e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" utilnet "k8s.io/utils/net" "k8s.io/utils/pointer" @@ -60,13 +62,13 @@ var _ = Describe("Network Segmentation", feature.NetworkSegmentation, func() { nodeHostnameKey = "kubernetes.io/hostname" podClusterNetPort uint16 = 9000 podClusterNetDefaultPort uint16 = 8080 - userDefinedNetworkIPv4Subnet = "172.31.0.0/16" // last subnet in private range 172.16.0.0/12 (rfc1918) + userDefinedNetworkIPv4Subnet = "172.16.0.0/16" // first subnet in private range 172.16.0.0/12 (rfc1918) userDefinedNetworkIPv6Subnet = "2014:100:200::0/60" - customL2IPv4Gateway = "172.31.0.3" + customL2IPv4Gateway = "172.16.0.3" customL2IPv6Gateway = "2014:100:200::3" - customL2IPv4ReservedCIDR = "172.31.1.0/24" + customL2IPv4ReservedCIDR = "172.16.1.0/24" customL2IPv6ReservedCIDR = "2014:100:200::100/120" - customL2IPv4InfraCIDR = "172.31.0.0/30" + customL2IPv4InfraCIDR = "172.16.0.0/30" customL2IPv6InfraCIDR = "2014:100:200::/122" userDefinedNetworkName = "hogwarts" nadName = "gryffindor" @@ -719,7 +721,7 @@ var _ = Describe("Network Segmentation", feature.NetworkSegmentation, func() { "with L2 primary UDN", "layer2", 4, - "172.31.0.0/29", + "172.16.0.0/29", "2014:100:200::0/125", ), // limit the number of pods to 10 @@ -2052,16 +2054,22 @@ func networkReadyFunc(client dynamic.ResourceInterface, name string) func() erro } func createManifest(namespace, manifest string) (func(), error) { - path := "test-" + randString(5) + ".yaml" - if err := os.WriteFile(path, []byte(manifest), 0644); err != nil { - framework.Failf("Unable to write yaml to disk: %v", err) + tmpDir, err := os.MkdirTemp("", "udn-test") + if err != nil { + return nil, err } cleanup := func() { - if err := os.Remove(path); err != nil { - framework.Logf("Unable to remove yaml from disk: %v", err) + if err := os.RemoveAll(tmpDir); err != nil { + framework.Logf("Unable to remove udn test yaml files from disk %s: %v", tmpDir, err) } } - _, err := e2ekubectl.RunKubectl(namespace, "create", "-f", path) + + path := filepath.Join(tmpDir, "test-ovn-k-udn-"+rand.String(5)+".yaml") + if err := os.WriteFile(path, []byte(manifest), 0644); err != nil { + return cleanup, fmt.Errorf("unable to write udn yaml to disk: %w", err) + } + + _, err = e2ekubectl.RunKubectl(namespace, "create", "-f", path) if err != nil { return cleanup, err } @@ -2439,14 +2447,12 @@ func runUDNPod(cs clientset.Interface, namespace string, serverPodConfig podConf Expect(serverPod).NotTo(BeNil()) By(fmt.Sprintf("asserting the UDN pod %s reaches the `Ready` state", serverPodConfig.name)) - var updatedPod *v1.Pod - Eventually(func() v1.PodPhase { - updatedPod, err = cs.CoreV1().Pods(namespace).Get(context.Background(), serverPod.GetName(), metav1.GetOptions{}) - if err != nil { - return v1.PodFailed - } - return updatedPod.Status.Phase - }, 2*time.Minute, 6*time.Second).Should(Equal(v1.PodRunning)) + // Retrieve and use pod start timeout value from deployment config. + err = e2epod.WaitTimeoutForPodRunningInNamespace(context.Background(), cs, serverPod.GetName(), namespace, + infraprovider.Get().GetDefaultTimeoutContext().PodStart) + Expect(err).NotTo(HaveOccurred()) + updatedPod, err := cs.CoreV1().Pods(namespace).Get(context.Background(), serverPod.GetName(), metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) return updatedPod } diff --git a/test/e2e/network_segmentation_api_validations.go b/test/e2e/network_segmentation_api_validations.go index ecf0459b02..e10151d5d6 100644 --- a/test/e2e/network_segmentation_api_validations.go +++ b/test/e2e/network_segmentation_api_validations.go @@ -32,6 +32,7 @@ var _ = Describe("Network Segmentation: API validations", feature.NetworkSegment Entry("ClusterUserDefinedNetwork, localnet, invalid vlan", testscenariocudn.LocalnetInvalidVLAN), Entry("ClusterUserDefinedNetwork, layer2", testscenariocudn.Layer2CUDNInvalid), Entry("UserDefinedNetwork, layer2", testscenariocudn.Layer2UDNInvalid), + Entry("ClusterUserDefinedNetwork, no-overlay, invalid", testscenariocudn.NoOverlayInvalid), ) DescribeTable("api-server should accept valid CRs", @@ -48,6 +49,7 @@ var _ = Describe("Network Segmentation: API validations", feature.NetworkSegment Entry("ClusterUserDefinedNetwork, localnet", testscenariocudn.LocalnetValid), Entry("ClusterUserDefinedNetwork, layer2", testscenariocudn.Layer2CUDNValid), Entry("UserDefinedNetwork, layer2", testscenariocudn.Layer2UDNValid), + Entry("ClusterUserDefinedNetwork, no-overlay, valid", testscenariocudn.NoOverlayValid), ) }) diff --git a/test/e2e/network_segmentation_default_network_annotation.go b/test/e2e/network_segmentation_default_network_annotation.go index f08229131b..4e42658588 100644 --- a/test/e2e/network_segmentation_default_network_annotation.go +++ b/test/e2e/network_segmentation_default_network_annotation.go @@ -28,8 +28,9 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat f.SkipNamespaceCreation = true type testCase struct { - ips []string - mac string + ips []string + mac string + lifecycle udnv1.NetworkIPAMLifecycle } DescribeTable("when added with static IP and MAC to a pod belonging to primary UDN", func(tc testCase) { if !isPreConfiguredUdnAddressesEnabled() { @@ -58,7 +59,7 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat Layer2: &udnv1.Layer2Config{ Role: udnv1.NetworkRolePrimary, Subnets: filterDualStackCIDRs(f.ClientSet, []udnv1.CIDR{"103.0.0.0/16", "2014:100:200::0/60"}), - IPAM: &udnv1.IPAMConfig{Mode: udnv1.IPAMEnabled, Lifecycle: udnv1.IPAMLifecyclePersistent}, + IPAM: &udnv1.IPAMConfig{Mode: udnv1.IPAMEnabled, Lifecycle: tc.lifecycle}, }, }, } @@ -126,9 +127,14 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat Should(Equal(corev1.PodPending)) }, - Entry("should create the pod with the specified static IP and MAC address", testCase{ + Entry("should create the pod with the specified static IP and MAC address with persistent IPAM", testCase{ ips: []string{"103.0.0.3/16", "2014:100:200::3/60"}, mac: "02:A1:B2:C3:D4:E5", + lifecycle: udnv1.IPAMLifecyclePersistent, + }), + Entry("should create the pod with the specified static IP and MAC address without persistent IPAM enabled", testCase{ + ips: []string{"103.0.0.3/16", "2014:100:200::3/60"}, + mac: "02:B1:C2:D3:E4:F5", }), ) diff --git a/test/e2e/network_segmentation_endpointslices_mirror.go b/test/e2e/network_segmentation_endpointslices_mirror.go index 83795a9afa..afd1d735f7 100644 --- a/test/e2e/network_segmentation_endpointslices_mirror.go +++ b/test/e2e/network_segmentation_endpointslices_mirror.go @@ -28,7 +28,7 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", feature.Networ f.SkipNamespaceCreation = true Context("a user defined primary network", func() { const ( - userDefinedNetworkIPv4Subnet = "172.31.0.0/16" // last subnet in private range 172.16.0.0/12 (rfc1918) + userDefinedNetworkIPv4Subnet = "172.16.0.0/16" // first subnet in private range 172.16.0.0/12 (rfc1918) userDefinedNetworkIPv6Subnet = "2014:100:200::0/60" nadName = "gryffindor" ) diff --git a/test/e2e/network_segmentation_policy.go b/test/e2e/network_segmentation_policy.go index 2da9ef862e..44f47598b9 100644 --- a/test/e2e/network_segmentation_policy.go +++ b/test/e2e/network_segmentation_policy.go @@ -27,13 +27,13 @@ var _ = ginkgo.Describe("Network Segmentation: Network Policies", feature.Networ ginkgo.Context("on a user defined primary network", func() { const ( nadName = "tenant-red" - userDefinedNetworkIPv4Subnet = "172.31.0.0/16" // last subnet in private range 172.16.0.0/12 (rfc1918) + userDefinedNetworkIPv4Subnet = "172.16.0.0/16" // first subnet in private range 172.16.0.0/12 (rfc1918) userDefinedNetworkIPv6Subnet = "2014:100:200::0/60" - customL2IPv4Gateway = "172.31.0.3" + customL2IPv4Gateway = "172.16.0.3" customL2IPv6Gateway = "2014:100:200::3" - customL2IPv4ReservedCIDR = "172.31.1.0/24" + customL2IPv4ReservedCIDR = "172.16.1.0/24" customL2IPv6ReservedCIDR = "2014:100:200::100/120" - customL2IPv4InfraCIDR = "172.31.0.0/30" + customL2IPv4InfraCIDR = "172.16.0.0/30" customL2IPv6InfraCIDR = "2014:100:200::/122" nodeHostnameKey = "kubernetes.io/hostname" workerOneNodeName = "ovn-worker" diff --git a/test/e2e/network_segmentation_preconfigured_layer2.go b/test/e2e/network_segmentation_preconfigured_layer2.go index 9ea09ef63d..3a67ecb3ba 100644 --- a/test/e2e/network_segmentation_preconfigured_layer2.go +++ b/test/e2e/network_segmentation_preconfigured_layer2.go @@ -108,32 +108,32 @@ var _ = Describe("Network Segmentation: Preconfigured Layer2 UDN", feature.Netwo netConfig: &networkAttachmentConfigParams{ name: "custom-l2-net", topology: "layer2", - cidr: joinStrings("172.31.0.0/16", "2014:100:200::0/60"), + cidr: joinStrings("172.16.0.0/16", "2014:100:200::0/60"), role: "primary", }, - expectedGatewayIPs: []string{"172.31.0.1", "2014:100:200::1"}, + expectedGatewayIPs: []string{"172.16.0.1", "2014:100:200::1"}, }), Entry("Layer2 with custom subnets", testConfig{ netConfig: &networkAttachmentConfigParams{ name: "custom-l2-net", topology: "layer2", - cidr: joinStrings("172.31.0.0/16", "2014:100:200::0/60"), + cidr: joinStrings("172.16.0.0/16", "2014:100:200::0/60"), role: "primary", - defaultGatewayIPs: joinStrings("172.31.0.10", "2014:100:200::100"), - reservedCIDRs: joinStrings("172.31.1.0/24", "2014:100:200::/122"), - infrastructureCIDRs: joinStrings("172.31.0.8/30", "2014:100:200::100/122"), + defaultGatewayIPs: joinStrings("172.16.0.10", "2014:100:200::100"), + reservedCIDRs: joinStrings("172.16.1.0/24", "2014:100:200::/122"), + infrastructureCIDRs: joinStrings("172.16.0.8/30", "2014:100:200::100/122"), }, - expectedGatewayIPs: []string{"172.31.0.10", "2014:100:200::100"}, + expectedGatewayIPs: []string{"172.16.0.10", "2014:100:200::100"}, }), Entry("Layer2 with inverted gateway/management IPs", testConfig{ netConfig: &networkAttachmentConfigParams{ name: "inv-gateway-net", topology: "layer2", - cidr: joinStrings("172.31.0.0/16", "2014:100:200::0/60"), + cidr: joinStrings("172.16.0.0/16", "2014:100:200::0/60"), role: "primary", - defaultGatewayIPs: joinStrings("172.31.0.2", "2014:100:200::2"), + defaultGatewayIPs: joinStrings("172.16.0.2", "2014:100:200::2"), }, - expectedGatewayIPs: []string{"172.31.0.2", "2014:100:200::2"}, + expectedGatewayIPs: []string{"172.16.0.2", "2014:100:200::2"}, }), ) @@ -162,9 +162,9 @@ var _ = Describe("Network Segmentation: Preconfigured Layer2 UDN", feature.Netwo netConfig: &networkAttachmentConfigParams{ name: "invalid-l2-net-reserved-subnets", topology: "layer2", - cidr: "172.31.0.0/16", + cidr: "172.16.0.0/16", role: "primary", - reservedCIDRs: "172.31.0.10/30", + reservedCIDRs: "172.16.0.10/30", }, expectedError: ContainSubstring( "Invalid value: \"object\": reservedSubnets must be a masked network address (no host bits set)", @@ -186,9 +186,9 @@ var _ = Describe("Network Segmentation: Preconfigured Layer2 UDN", feature.Netwo netConfig: &networkAttachmentConfigParams{ name: "invalid-l2-net-infra-subnets", topology: "layer2", - cidr: "172.31.0.0/16", + cidr: "172.16.0.0/16", role: "primary", - infrastructureCIDRs: "172.31.0.10/30", + infrastructureCIDRs: "172.16.0.10/30", }, expectedError: ContainSubstring( "Invalid value: \"object\": infrastructureSubnets must be a masked network address (no host bits set)", diff --git a/test/e2e/network_segmentation_services.go b/test/e2e/network_segmentation_services.go index 29e9b730e3..124f206fc5 100644 --- a/test/e2e/network_segmentation_services.go +++ b/test/e2e/network_segmentation_services.go @@ -41,13 +41,13 @@ var _ = Describe("Network Segmentation: services", feature.NetworkSegmentation, nadName = "tenant-red" servicePort = 88 serviceTargetPort = 80 - userDefinedNetworkIPv4Subnet = "172.31.0.0/16" // last subnet in private range 172.16.0.0/12 (rfc1918) + userDefinedNetworkIPv4Subnet = "172.16.0.0/16" // first subnet in private range 172.16.0.0/12 (rfc1918) userDefinedNetworkIPv6Subnet = "2014:100:200::0/60" - customL2IPv4Gateway = "172.31.0.3" + customL2IPv4Gateway = "172.16.0.3" customL2IPv6Gateway = "2014:100:200::3" - customL2IPv4ReservedCIDR = "172.31.1.0/24" + customL2IPv4ReservedCIDR = "172.16.1.0/24" customL2IPv6ReservedCIDR = "2014:100:200::100/120" - customL2IPv4InfraCIDR = "172.31.0.0/30" + customL2IPv4InfraCIDR = "172.16.0.0/30" customL2IPv6InfraCIDR = "2014:100:200::/122" ) diff --git a/test/e2e/node_shutdown_startup.go b/test/e2e/node_shutdown_startup.go new file mode 100644 index 0000000000..93e9641e4d --- /dev/null +++ b/test/e2e/node_shutdown_startup.go @@ -0,0 +1,243 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" +) + +var _ = ginkgo.Describe("Node Shutdown and Startup", ginkgo.Serial, func() { + const ( + nodeShutdownTimeout = 5 * time.Minute + nodeStartupTimeout = 10 * time.Minute + ) + + var ( + f *framework.Framework + testNodeName string + ) + + f = wrappedTestFramework("node-shutdown-startup") + + ginkgo.BeforeEach(func() { + testNodeName = "" + // Skip test if not using kind provider + if infraprovider.Get().Name() != "kind" { + e2eskipper.Skipf("Node shutdown/startup test only supported for kind provider, got: %s", infraprovider.Get().Name()) + } + + // Get a worker node for testing (skip master/control-plane nodes) + nodes, err := e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet) + framework.ExpectNoError(err, "Failed to get ready schedulable nodes") + + if len(nodes.Items) < 2 { + e2eskipper.Skipf("Test requires at least 2 nodes, found %d", len(nodes.Items)) + } + + // Find a worker node (not master/control-plane) + for _, node := range nodes.Items { + if !isControlPlaneNode(node) { + testNodeName = node.Name + break + } + } + + if testNodeName == "" { + e2eskipper.Skipf("No worker nodes found for testing") + } + + framework.Logf("Using node %s for shutdown/startup test", testNodeName) + }) + + ginkgo.It("should maintain cluster health after node shutdown and startup", func() { + err := waitOVNKubernetesHealthy(f) + framework.ExpectNoError(err, "OVN-Kubernetes cluster should be healthy initially") + + ginkgo.By("Check breth0 IP address families before shutdown") + initialIPFamilies, err := getBridgeIPAddressFamilies(testNodeName) + framework.ExpectNoError(err, "Should be able to get breth0 IP address families before shutdown") + framework.Logf("Node %s breth0 initial IP families: IPv4=%v, IPv6=%v", testNodeName, initialIPFamilies.hasIPv4, initialIPFamilies.hasIPv6) + if !initialIPFamilies.hasIPv4 && !initialIPFamilies.hasIPv6 { + framework.Failf("breth0 should have at least one IP address family (IPv4 or IPv6) before shutdown, but found IPv4=%v, IPv6=%v", initialIPFamilies.hasIPv4, initialIPFamilies.hasIPv6) + } + + ginkgo.By("Shut down the node") + framework.Logf("Shutting down node %s", testNodeName) + err = infraprovider.Get().ShutdownNode(testNodeName) + framework.ExpectNoError(err, "Failed to shutdown node %s", testNodeName) + + // Ensure node is started back up regardless of test failure + defer func() { + // If the test failed, dump container logs from the node before cleanup + if ginkgo.CurrentSpecReport().Failed() { + framework.Logf("Test failed, dumping container logs from node %s", testNodeName) + dumpContainerLogsFromNode(testNodeName) + } + + framework.Logf("Ensuring node %s is started (cleanup)", testNodeName) + if startErr := infraprovider.Get().StartNode(testNodeName); startErr != nil { + framework.Logf("Failed to start node %s during cleanup: %v", testNodeName, startErr) + } else { + // Wait for the node to become Ready after startup in cleanup + framework.Logf("Waiting for node %s to become Ready after cleanup startup", testNodeName) + waitForNodeReadyState(f, testNodeName, nodeStartupTimeout, true) + } + }() + + // Wait for the node to be marked as NotReady + ginkgo.By("Waiting for node to be marked as NotReady") + waitForNodeReadyState(f, testNodeName, nodeShutdownTimeout, false) + + ginkgo.By("Start the node") + framework.Logf("Starting node %s", testNodeName) + err = infraprovider.Get().StartNode(testNodeName) + framework.ExpectNoError(err, "Failed to start node %s", testNodeName) + + // Wait for the node to become Ready again + ginkgo.By("Waiting for node to become Ready") + waitForNodeReadyState(f, testNodeName, nodeStartupTimeout, true) + + ginkgo.By("Confirm that ovn-k cluster is back to healthy after all services are settled") + err = waitOVNKubernetesHealthy(f) + framework.ExpectNoError(err, "OVN-Kubernetes cluster should be healthy after node restart") + + ginkgo.By("Confirm that breth0 on the node has IP addresses of expected families (that were moved from eth0)") + err = checkBridgeIPAddressFamilies(testNodeName, initialIPFamilies) + framework.ExpectNoError(err, "breth0 should have IP addresses of the same families as before restart") + + framework.Logf("Node shutdown/startup test completed successfully for node %s", testNodeName) + }) +}) + +// isControlPlaneNode checks if a node is a control plane (master) node +func isControlPlaneNode(node corev1.Node) bool { + // Check for common control plane labels and taints + if _, exists := node.Labels["node-role.kubernetes.io/master"]; exists { + return true + } + if _, exists := node.Labels["node-role.kubernetes.io/control-plane"]; exists { + return true + } + + // Check for control plane taints + for _, taint := range node.Spec.Taints { + if taint.Key == "node-role.kubernetes.io/master" || + taint.Key == "node-role.kubernetes.io/control-plane" { + return true + } + } + + return false +} + +// ipAddressFamilies represents which IP address families are present on an interface +type ipAddressFamilies struct { + hasIPv4 bool + hasIPv6 bool +} + +// getBridgeIPAddressFamilies checks which IP address families are present on breth0 interface +func getBridgeIPAddressFamilies(nodeName string) (ipAddressFamilies, error) { + // TODO: change name of the bridge if running on non-kind clusters + stdout, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "addr", "show", "breth0"}) + if err != nil { + return ipAddressFamilies{}, fmt.Errorf("failed to get breth0 interface info on node %s: %v", nodeName, err) + } + + families := ipAddressFamilies{ + hasIPv4: strings.Contains(stdout, "inet "), + hasIPv6: strings.Contains(stdout, "inet6 "), + } + + if !families.hasIPv4 && !families.hasIPv6 { + return families, fmt.Errorf("breth0 interface on node %s has no IP addresses (neither IPv4 nor IPv6)", nodeName) + } + + return families, nil +} + +// checkBridgeIPAddressFamilies verifies that breth0 interface has IP addresses of the expected families +func checkBridgeIPAddressFamilies(nodeName string, expectedFamilies ipAddressFamilies) error { + return wait.PollImmediate(2*time.Second, 60*time.Second, func() (bool, error) { + currentFamilies, err := getBridgeIPAddressFamilies(nodeName) + if err != nil { + framework.Logf("Error checking breth0 IP families on node %s: %v", nodeName, err) + return false, nil + } + + // Check if current families match expected families + if currentFamilies.hasIPv4 == expectedFamilies.hasIPv4 && currentFamilies.hasIPv6 == expectedFamilies.hasIPv6 { + framework.Logf("Node %s breth0 has expected IP address families: IPv4=%v, IPv6=%v", + nodeName, currentFamilies.hasIPv4, currentFamilies.hasIPv6) + return true, nil + } + + framework.Logf("Node %s breth0 IP families do not match yet - Current: IPv4=%v, IPv6=%v; Expected: IPv4=%v, IPv6=%v", + nodeName, currentFamilies.hasIPv4, currentFamilies.hasIPv6, expectedFamilies.hasIPv4, expectedFamilies.hasIPv6) + return false, nil + }) +} + +// dumpContainerLogsFromNode dumps logs of all containers on the specified node using crictl +func dumpContainerLogsFromNode(nodeName string) { + framework.Logf("Dumping container logs from node %s", nodeName) + + // First, get list of all containers + containersOutput, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"crictl", "ps", "-a", "-o", "json"}) + if err != nil { + framework.Logf("Failed to list containers on node %s: %v", nodeName, err) + return + } + + framework.Logf("Container list output from node %s:\n%s", nodeName, containersOutput) + + // Parse the JSON to get individual container IDs and names + type Container struct { + ID string `json:"id"` + Metadata struct { + Name string `json:"name"` + } `json:"metadata"` + } + type ContainersList struct { + Containers []Container `json:"containers"` + } + + var containersList ContainersList + if err := json.Unmarshal([]byte(containersOutput), &containersList); err != nil { + framework.Logf("Failed to parse containers JSON from node %s: %v", nodeName, err) + // Fallback: try to extract container IDs using crictl ps without JSON + simpleOutput, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"crictl", "ps", "-a"}) + if err != nil { + framework.Logf("Failed to list containers (simple format) on node %s: %v", nodeName, err) + return + } + framework.Logf("Container list (simple format) from node %s:\n%s", nodeName, simpleOutput) + return + } + + // Dump logs for each container + for _, container := range containersList.Containers { + framework.Logf("Dumping logs for container %s (%s) on node %s", container.Metadata.Name, container.ID, nodeName) + + logs, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"crictl", "logs", "--tail=100", container.ID}) + if err != nil { + framework.Logf("Failed to get logs for container %s (%s) on node %s: %v", container.Metadata.Name, container.ID, nodeName, err) + continue + } + + framework.Logf("=== Logs for container %s (%s) on node %s ===\n%s\n=== End logs ===", + container.Metadata.Name, container.ID, nodeName, logs) + } +} diff --git a/test/e2e/testscenario/clusternetworkconnect/invalid-scenarios.go b/test/e2e/testscenario/clusternetworkconnect/invalid-scenarios.go index 36588d6d2e..4e3a34d00f 100644 --- a/test/e2e/testscenario/clusternetworkconnect/invalid-scenarios.go +++ b/test/e2e/testscenario/clusternetworkconnect/invalid-scenarios.go @@ -479,4 +479,54 @@ spec: connectivity: ["PodNetwork"] `, }, + + // CEL validation (dual-stack networkPrefix host bits must match) + { + Description: "dual-stack with mismatched networkPrefix host bits - IPv4 /24 vs IPv6 /64", + ExpectedErr: "networkPrefix must have matching host bits", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterNetworkConnect +metadata: + name: mismatched-host-bits-v4-24-v6-64 +spec: + networkSelectors: + - networkSelectionType: "ClusterUserDefinedNetworks" + clusterUserDefinedNetworkSelector: + networkSelector: + matchLabels: + name: test + connectSubnets: + - cidr: "192.168.0.0/16" + networkPrefix: 24 + - cidr: "fd01::/48" + networkPrefix: 64 + connectivity: ["PodNetwork"] +`, + // IPv4: 32-24=8 host bits, IPv6: 128-64=64 host bits - MISMATCH! + }, + { + Description: "dual-stack with mismatched networkPrefix host bits - IPv4 /28 vs IPv6 /123", + ExpectedErr: "networkPrefix must have matching host bits", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterNetworkConnect +metadata: + name: mismatched-host-bits-v4-28-v6-123 +spec: + networkSelectors: + - networkSelectionType: "ClusterUserDefinedNetworks" + clusterUserDefinedNetworkSelector: + networkSelector: + matchLabels: + name: test + connectSubnets: + - cidr: "192.168.0.0/24" + networkPrefix: 28 + - cidr: "fd01::/120" + networkPrefix: 123 + connectivity: ["PodNetwork"] +`, + // IPv4: 32-28=4 host bits, IPv6: 128-123=5 host bits - MISMATCH! + }, } diff --git a/test/e2e/testscenario/clusternetworkconnect/valid-scenarios.go b/test/e2e/testscenario/clusternetworkconnect/valid-scenarios.go index 931eb99dfa..c95a53c80d 100644 --- a/test/e2e/testscenario/clusternetworkconnect/valid-scenarios.go +++ b/test/e2e/testscenario/clusternetworkconnect/valid-scenarios.go @@ -44,11 +44,11 @@ spec: `, }, { - Description: "valid dual-stack CIDRs", + Description: "valid dual-stack CIDRs with matching host bits", Manifest: ` apiVersion: k8s.ovn.org/v1 kind: ClusterNetworkConnect -metadata: +metadata: name: valid-dual-stack spec: networkSelectors: @@ -60,8 +60,8 @@ spec: connectSubnets: - cidr: "192.168.0.0/16" networkPrefix: 24 - - cidr: "fd01::/64" - networkPrefix: 96 + - cidr: "fd01::/112" + networkPrefix: 120 connectivity: ["PodNetwork"] `, }, @@ -107,8 +107,8 @@ spec: connectSubnets: - cidr: "192.168.0.0/16" networkPrefix: 24 - - cidr: "fd01::/64" - networkPrefix: 96 + - cidr: "fd01::/112" + networkPrefix: 120 connectivity: ["PodNetwork", "ClusterIPServiceNetwork"] `, }, diff --git a/test/e2e/testscenario/cudn/invalid-scenarios-no-overlay.go b/test/e2e/testscenario/cudn/invalid-scenarios-no-overlay.go new file mode 100644 index 0000000000..3f52313a8d --- /dev/null +++ b/test/e2e/testscenario/cudn/invalid-scenarios-no-overlay.go @@ -0,0 +1,139 @@ +package cudn + +import "github.com/ovn-org/ovn-kubernetes/test/e2e/testscenario" + +var NoOverlayInvalid = []testscenario.ValidateCRScenario{ + { + Description: "NoOverlay transport is only supported for Layer3 primary networks - Layer2 network", + ExpectedErr: `transport 'NoOverlay' is only supported for Layer3 primary networks`, + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-layer2-fail +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: red}} + network: + topology: Layer2 + layer2: + role: Primary + subnets: + - 10.10.0.0/16 + transport: NoOverlay + noOverlayOptions: + outboundSNAT: Enabled + routing: Managed +`, + }, + { + Description: "NoOverlay transport is only supported for Layer3 primary networks - Layer3 secondary network", + ExpectedErr: `transport 'NoOverlay' is only supported for Layer3 primary networks`, + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-layer3-secondary-fail +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: red}} + network: + topology: Layer3 + layer3: + role: Secondary + subnets: + - cidr: 10.10.0.0/16 + hostSubnet: 24 + transport: NoOverlay + noOverlayOptions: + outboundSNAT: Enabled + routing: Managed +`, + }, + { + Description: "NoOverlay transport is only supported for Layer3 primary networks - Localnet network", + ExpectedErr: `transport 'NoOverlay' is only supported for Layer3 primary networks`, + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-localnet-fail +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: red}} + network: + topology: Localnet + localnet: + role: Secondary + physicalNetworkName: physnet1 + subnets: + - 10.10.0.0/16 + transport: NoOverlay + noOverlayOptions: + outboundSNAT: Enabled + routing: Managed +`, + }, + { + Description: "noOverlayOptions is required when transport is NoOverlay", + ExpectedErr: `noOverlayOptions is required when transport is 'NoOverlay'`, + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-missing-options-fail +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: red}} + network: + topology: Layer3 + layer3: + role: Primary + subnets: + - cidr: 10.10.0.0/16 + hostSubnet: 24 + transport: NoOverlay +`, + }, + { + Description: "noOverlayOptions is forbidden when transport is Geneve", + ExpectedErr: `noOverlayOptions is forbidden when transport is not 'NoOverlay'`, + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-options-with-geneve-fail +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: red}} + network: + topology: Layer3 + layer3: + role: Primary + subnets: + - cidr: 10.10.0.0/16 + hostSubnet: 24 + transport: Geneve + noOverlayOptions: + outboundSNAT: Enabled + routing: Managed +`, + }, + { + Description: "noOverlayOptions is forbidden when transport is not set (defaults to Geneve)", + ExpectedErr: `noOverlayOptions is forbidden when transport is not 'NoOverlay'`, + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-options-without-transport-fail +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: red}} + network: + topology: Layer3 + layer3: + role: Primary + subnets: + - cidr: 10.10.0.0/16 + hostSubnet: 24 + noOverlayOptions: + outboundSNAT: Enabled + routing: Managed +`, + }, +} diff --git a/test/e2e/testscenario/cudn/valid-scenarios-no-overlay.go b/test/e2e/testscenario/cudn/valid-scenarios-no-overlay.go new file mode 100644 index 0000000000..260fa68fae --- /dev/null +++ b/test/e2e/testscenario/cudn/valid-scenarios-no-overlay.go @@ -0,0 +1,162 @@ +package cudn + +import "github.com/ovn-org/ovn-kubernetes/test/e2e/testscenario" + +var NoOverlayValid = []testscenario.ValidateCRScenario{ + { + Description: "NoOverlay transport with managed routing and enabled SNAT", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-managed-enabled-snat +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: red}} + network: + topology: Layer3 + layer3: + role: Primary + mtu: 1500 + subnets: + - cidr: 10.10.0.0/16 + hostSubnet: 24 + transport: NoOverlay + noOverlayOptions: + outboundSNAT: Enabled + routing: Managed +`, + }, + { + Description: "NoOverlay transport with unmanaged routing and disabled SNAT", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-unmanaged-disabled-snat +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: blue}} + network: + topology: Layer3 + layer3: + role: Primary + mtu: 1500 + subnets: + - cidr: 10.20.0.0/16 + hostSubnet: 24 + transport: NoOverlay + noOverlayOptions: + outboundSNAT: Disabled + routing: Unmanaged +`, + }, + { + Description: "NoOverlay transport with managed routing and disabled SNAT", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-managed-disabled-snat +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: green}} + network: + topology: Layer3 + layer3: + role: Primary + mtu: 1500 + subnets: + - cidr: 10.30.0.0/16 + hostSubnet: 24 + transport: NoOverlay + noOverlayOptions: + outboundSNAT: Disabled + routing: Managed +`, + }, + { + Description: "NoOverlay transport with unmanaged routing and enabled SNAT", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-unmanaged-enabled-snat +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: yellow}} + network: + topology: Layer3 + layer3: + role: Primary + mtu: 1500 + subnets: + - cidr: 10.40.0.0/16 + hostSubnet: 24 + transport: NoOverlay + noOverlayOptions: + outboundSNAT: Enabled + routing: Unmanaged +`, + }, + { + Description: "NoOverlay transport with dual-stack subnets", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: no-overlay-dual-stack +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: purple}} + network: + topology: Layer3 + layer3: + role: Primary + mtu: 1500 + subnets: + - cidr: 10.50.0.0/16 + hostSubnet: 24 + - cidr: fd00:10:50::/48 + hostSubnet: 64 + transport: NoOverlay + noOverlayOptions: + outboundSNAT: Enabled + routing: Managed +`, + }, + { + Description: "Layer3 primary network with default Geneve transport (no transport field set)", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: geneve-default-transport +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: orange}} + network: + topology: Layer3 + layer3: + role: Primary + mtu: 1400 + subnets: + - cidr: 10.60.0.0/16 + hostSubnet: 24 +`, + }, + { + Description: "Layer3 primary network with explicit Geneve transport", + Manifest: ` +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: geneve-explicit-transport +spec: + namespaceSelector: {matchLabels: {kubernetes.io/metadata.name: cyan}} + network: + topology: Layer3 + layer3: + role: Primary + mtu: 1400 + subnets: + - cidr: 10.70.0.0/16 + hostSubnet: 24 + transport: Geneve +`, + }, +} diff --git a/test/e2e/util.go b/test/e2e/util.go index c4c2808c7b..d23208bc31 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1138,14 +1138,37 @@ func isDualStackCluster(nodes *v1.NodeList) bool { // used to inject OVN specific test actions func wrappedTestFramework(basename string) *framework.Framework { f := newPrivelegedTestFramework(basename) - // inject dumping dbs on failure ginkgo.JustAfterEach(func() { - if !ginkgo.CurrentSpecReport().Failed() { + logLocation := "/var/log" + coredumpDir := "/tmp/kind/logs/coredumps" + dbLocation := "/var/lib/openvswitch" + // https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5782 + skippedCoredumps := []string{"zebra", "bgpd", "mgmtd"} + + // Check for coredumps on host + var coredumpFiles []string + files, err := os.ReadDir(coredumpDir) + if err == nil { + for _, file := range files { + if file.IsDir() { + continue + } + fileName := file.Name() + if slices.ContainsFunc(skippedCoredumps, func(s string) bool { + return strings.Contains(fileName, s) + }) { + framework.Logf("Ignoring coredump for skipped process: %s", fileName) + continue + } + coredumpFiles = append(coredumpFiles, fileName) + } + } + + // If coredumps found OR test already failed, collect dbs + if len(coredumpFiles) == 0 && !ginkgo.CurrentSpecReport().Failed() { return } - logLocation := "/var/log" - dbLocation := "/var/lib/openvswitch" // Potential database locations ovsdbLocations := []string{"/etc/origin/openvswitch", "/etc/openvswitch"} dbs := []string{"ovnnb_db.db", "ovnsb_db.db"} @@ -1183,6 +1206,11 @@ func wrappedTestFramework(basename string) *framework.Framework { } } } + + // Abort testing if any coredump found + if len(coredumpFiles) != 0 { + ginkgo.AbortSuite(fmt.Sprintf("Coredumps found during test execution: %s", strings.Join(coredumpFiles, ", "))) + } }) return f @@ -1314,6 +1342,10 @@ func randStr(n int) string { func isCIDRIPFamilySupported(cs kubernetes.Interface, cidr string) bool { ginkgo.GinkgoHelper() gomega.Expect(cidr).To(gomega.ContainSubstring("/")) + // if cidr in format 2010:100:200::0/60/64, trim to 2010:100:200::0/60 + if tokens := strings.Split(cidr, "/"); len(tokens) == 3 { + cidr = fmt.Sprintf(`%s/%s`, tokens[0], tokens[1]) + } isIPv6 := utilnet.IsIPv6CIDRString(cidr) return (isIPv4Supported(cs) && !isIPv6) || (isIPv6Supported(cs) && isIPv6) } @@ -1862,3 +1894,126 @@ func findOVNDBLeaderPod(f *framework.Framework, cs clientset.Interface, namespac return nil, fmt.Errorf("no nbdb leader pod found among %d ovnkube-db pods", len(dbPods.Items)) } + +// waitOVNKubernetesHealthy waits for the ovn-kubernetes cluster to be healthy +// This includes checking that all nodes are ready, all ovnkube-node pods are running, +// and all ovnkube-master/control-plane pods are running +func waitOVNKubernetesHealthy(f *framework.Framework) error { + return wait.PollImmediate(5*time.Second, 300*time.Second, func() (bool, error) { + // Check that all nodes are ready and schedulable + nodes, err := e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet) + if err != nil { + framework.Logf("Error getting ready schedulable nodes: %v", err) + return false, nil + } + + framework.Logf("Found %d ready schedulable nodes", len(nodes.Items)) + + // Check ovnkube-node pods + podClient := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()) + ovnNodePods, err := podClient.List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=ovnkube-node", + }) + if err != nil { + framework.Logf("Error listing ovnkube-node pods: %v", err) + return false, nil + } + + expectedNodePods := len(nodes.Items) + if len(ovnNodePods.Items) != expectedNodePods { + framework.Logf("Expected %d ovnkube-node pods, found %d", expectedNodePods, len(ovnNodePods.Items)) + return false, nil + } + + // Check that all ovnkube-node pods are running and ready + for _, pod := range ovnNodePods.Items { + isReady, err := testutils.PodRunningReady(&pod) + if err != nil { + framework.Logf("Error checking if ovnkube-node pod %s is ready: %v", pod.Name, err) + return false, nil + } + if !isReady { + framework.Logf("ovnkube-node pod %s is not running and ready (phase: %s)", pod.Name, pod.Status.Phase) + return false, nil + } + } + + // Check ovnkube-master/control-plane pods + ovnMasterPods, err := podClient.List(context.Background(), metav1.ListOptions{ + LabelSelector: "name=ovnkube-master", + }) + if err != nil { + framework.Logf("Error listing ovnkube-master pods: %v", err) + return false, nil + } + + // If no ovnkube-master pods, check for ovnkube-control-plane + if len(ovnMasterPods.Items) == 0 { + ovnMasterPods, err = podClient.List(context.Background(), metav1.ListOptions{ + LabelSelector: "name=ovnkube-control-plane", + }) + if err != nil { + framework.Logf("Error listing ovnkube-control-plane pods: %v", err) + return false, nil + } + } + + if len(ovnMasterPods.Items) == 0 { + framework.Logf("No ovnkube-master or ovnkube-control-plane pods found") + return false, nil + } + + // Check that at least one master/control-plane pod is running and ready + runningMasterPods := 0 + for _, pod := range ovnMasterPods.Items { + isReady, err := testutils.PodRunningReady(&pod) + if err != nil { + framework.Logf("Error checking if ovnkube-master pod %s is ready: %v", pod.Name, err) + continue + } + if isReady { + runningMasterPods++ + } + } + + if runningMasterPods == 0 { + framework.Logf("No ovnkube-master/control-plane pods are running") + return false, nil + } + + framework.Logf("OVN-Kubernetes cluster is healthy: %d nodes, %d ovnkube-node pods, %d running master pods", + len(nodes.Items), len(ovnNodePods.Items), runningMasterPods) + return true, nil + }) +} + +// waitForNodeReadyState waits for the specified node to reach the desired Ready state within the given timeout +func waitForNodeReadyState(f *framework.Framework, nodeName string, timeout time.Duration, desiredReady bool) { + var stateDescription, expectationMessage string + if desiredReady { + stateDescription = "Ready" + expectationMessage = "Node should become Ready after startup" + } else { + stateDescription = "NotReady" + expectationMessage = "Node should become NotReady after shutdown" + } + + gomega.Eventually(func() bool { + node, err := f.ClientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + if err != nil { + framework.Logf("Error getting node %s: %v", nodeName, err) + return false + } + + for _, condition := range node.Status.Conditions { + if condition.Type == v1.NodeReady { + isReady := condition.Status == v1.ConditionTrue + if isReady == desiredReady { + framework.Logf("Node %s is now %s", nodeName, stateDescription) + return true + } + } + } + return false + }, timeout, 10*time.Second).Should(gomega.BeTrue(), expectationMessage) +} diff --git a/test/scripts/e2e-cp.sh b/test/scripts/e2e-cp.sh index 1e24bdcac4..a786f22f0f 100755 --- a/test/scripts/e2e-cp.sh +++ b/test/scripts/e2e-cp.sh @@ -40,12 +40,19 @@ skip() { SKIPPED_TESTS+=$* } -SKIPPED_LABELED_TESTS="" +LABELED_TESTS="" skip_label() { - if [ "$SKIPPED_LABELED_TESTS" != "" ]; then - SKIPPED_LABELED_TESTS+=" && " + if [ "$LABELED_TESTS" != "" ]; then + LABELED_TESTS+=" && " fi - SKIPPED_LABELED_TESTS+="!($*)" + LABELED_TESTS+="!($*)" +} + +require_label() { + if [ "$LABELED_TESTS" != "" ]; then + LABELED_TESTS+=" && " + fi + LABELED_TESTS+="$*" } if [ "$PLATFORM_IPV4_SUPPORT" == true ]; then @@ -148,6 +155,12 @@ if [[ "${WHAT}" != "${CLUSTER_NETWORK_CONNECT_TESTS}"* ]]; then skip $CLUSTER_NETWORK_CONNECT_TESTS fi +SERIAL_LABEL="Serial" +if [[ "${WHAT}" = "$SERIAL_LABEL" ]]; then + require_label "$SERIAL_LABEL" + shift # don't "focus" on Serial since we filter by label +fi + BGP_TESTS="BGP" if [ "$ENABLE_ROUTE_ADVERTISEMENTS" != true ]; then skip $BGP_TESTS @@ -205,6 +218,13 @@ else fi fi +# if we set PARALLEL=true, skip serial test +if [ "${PARALLEL:-false}" = "true" ]; then + export GINKGO_PARALLEL=y + export GINKGO_PARALLEL_NODES=10 + skip_label "$SERIAL_LABEL" +fi + # setting these is required to make RuntimeClass tests work ... :/ export KUBE_CONTAINER_RUNTIME=remote export KUBE_CONTAINER_RUNTIME_ENDPOINT=unix:///run/containerd/containerd.sock @@ -227,7 +247,7 @@ go test -test.timeout ${GO_TEST_TIMEOUT}m -v . \ -ginkgo.timeout ${TEST_TIMEOUT}m \ -ginkgo.flake-attempts ${FLAKE_ATTEMPTS:-2} \ -ginkgo.skip="${SKIPPED_TESTS}" \ - ${SKIPPED_LABELED_TESTS:+-ginkgo.label-filter="${SKIPPED_LABELED_TESTS}"} \ + ${LABELED_TESTS:+-ginkgo.label-filter="${LABELED_TESTS}"} \ -ginkgo.junit-report=${E2E_REPORT_DIR}/junit_${E2E_REPORT_PREFIX}report.xml \ -provider skeleton \ -kubeconfig ${KUBECONFIG} \ diff --git a/test/scripts/upgrade-ovn.sh b/test/scripts/upgrade-ovn.sh index c854accfa2..103b93ff2f 100755 --- a/test/scripts/upgrade-ovn.sh +++ b/test/scripts/upgrade-ovn.sh @@ -149,6 +149,7 @@ create_ovn_kube_manifests() { --ovn-loglevel-controller="${OVN_LOG_LEVEL_CONTROLLER}" \ --egress-ip-enable=true \ --egress-firewall-enable=true \ + --enable-coredumps=true \ --v4-join-subnet="${JOIN_SUBNET_IPV4}" \ --v6-join-subnet="${JOIN_SUBNET_IPV6}" \ --ex-gw-network-interface="${OVN_EX_GW_NETWORK_INTERFACE}" \